1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -riscv-enable-sink-fold -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32V
4 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -riscv-enable-sink-fold -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=ilp32d \
7 ; RUN: -riscv-enable-sink-fold -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVE32F
8 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=lp64d \
9 ; RUN: -riscv-enable-sink-fold -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVE32F
11 declare <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i8>)
13 define <1 x i8> @mgather_v1i8(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i8> %passthru) {
14 ; RV32V-LABEL: mgather_v1i8:
16 ; RV32V-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
17 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
18 ; RV32V-NEXT: vmv1r.v v8, v9
21 ; RV64V-LABEL: mgather_v1i8:
23 ; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
24 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
25 ; RV64V-NEXT: vmv1r.v v8, v9
28 ; RV32ZVE32F-LABEL: mgather_v1i8:
29 ; RV32ZVE32F: # %bb.0:
30 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
31 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
32 ; RV32ZVE32F-NEXT: vmv1r.v v8, v9
33 ; RV32ZVE32F-NEXT: ret
35 ; RV64ZVE32F-LABEL: mgather_v1i8:
36 ; RV64ZVE32F: # %bb.0:
37 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
38 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
39 ; RV64ZVE32F-NEXT: bnez a1, .LBB0_2
40 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
41 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
42 ; RV64ZVE32F-NEXT: vle8.v v8, (a0)
43 ; RV64ZVE32F-NEXT: .LBB0_2: # %else
44 ; RV64ZVE32F-NEXT: ret
45 %v = call <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr> %ptrs, i32 1, <1 x i1> %m, <1 x i8> %passthru)
49 declare <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i8>)
51 define <2 x i8> @mgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
52 ; RV32V-LABEL: mgather_v2i8:
54 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
55 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
56 ; RV32V-NEXT: vmv1r.v v8, v9
59 ; RV64V-LABEL: mgather_v2i8:
61 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
62 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
63 ; RV64V-NEXT: vmv1r.v v8, v9
66 ; RV32ZVE32F-LABEL: mgather_v2i8:
67 ; RV32ZVE32F: # %bb.0:
68 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
69 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
70 ; RV32ZVE32F-NEXT: vmv1r.v v8, v9
71 ; RV32ZVE32F-NEXT: ret
73 ; RV64ZVE32F-LABEL: mgather_v2i8:
74 ; RV64ZVE32F: # %bb.0:
75 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
76 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
77 ; RV64ZVE32F-NEXT: andi a3, a2, 1
78 ; RV64ZVE32F-NEXT: bnez a3, .LBB1_3
79 ; RV64ZVE32F-NEXT: # %bb.1: # %else
80 ; RV64ZVE32F-NEXT: andi a2, a2, 2
81 ; RV64ZVE32F-NEXT: bnez a2, .LBB1_4
82 ; RV64ZVE32F-NEXT: .LBB1_2: # %else2
83 ; RV64ZVE32F-NEXT: ret
84 ; RV64ZVE32F-NEXT: .LBB1_3: # %cond.load
85 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
86 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
87 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
88 ; RV64ZVE32F-NEXT: andi a2, a2, 2
89 ; RV64ZVE32F-NEXT: beqz a2, .LBB1_2
90 ; RV64ZVE32F-NEXT: .LBB1_4: # %cond.load1
91 ; RV64ZVE32F-NEXT: lbu a0, 0(a1)
92 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
93 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
94 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
95 ; RV64ZVE32F-NEXT: ret
96 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
100 define <2 x i16> @mgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
101 ; RV32V-LABEL: mgather_v2i8_sextload_v2i16:
103 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
104 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
105 ; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
106 ; RV32V-NEXT: vsext.vf2 v8, v9
109 ; RV64V-LABEL: mgather_v2i8_sextload_v2i16:
111 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
112 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
113 ; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
114 ; RV64V-NEXT: vsext.vf2 v8, v9
117 ; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i16:
118 ; RV32ZVE32F: # %bb.0:
119 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
120 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
121 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
122 ; RV32ZVE32F-NEXT: vsext.vf2 v8, v9
123 ; RV32ZVE32F-NEXT: ret
125 ; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i16:
126 ; RV64ZVE32F: # %bb.0:
127 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
128 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
129 ; RV64ZVE32F-NEXT: andi a3, a2, 1
130 ; RV64ZVE32F-NEXT: beqz a3, .LBB2_2
131 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
132 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
133 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
134 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
135 ; RV64ZVE32F-NEXT: .LBB2_2: # %else
136 ; RV64ZVE32F-NEXT: andi a2, a2, 2
137 ; RV64ZVE32F-NEXT: beqz a2, .LBB2_4
138 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
139 ; RV64ZVE32F-NEXT: lbu a0, 0(a1)
140 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
141 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
142 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
143 ; RV64ZVE32F-NEXT: .LBB2_4: # %else2
144 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
145 ; RV64ZVE32F-NEXT: vsext.vf2 v9, v8
146 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
147 ; RV64ZVE32F-NEXT: ret
148 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
149 %ev = sext <2 x i8> %v to <2 x i16>
153 define <2 x i16> @mgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
154 ; RV32V-LABEL: mgather_v2i8_zextload_v2i16:
156 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
157 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
158 ; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
159 ; RV32V-NEXT: vzext.vf2 v8, v9
162 ; RV64V-LABEL: mgather_v2i8_zextload_v2i16:
164 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
165 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
166 ; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
167 ; RV64V-NEXT: vzext.vf2 v8, v9
170 ; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i16:
171 ; RV32ZVE32F: # %bb.0:
172 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
173 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
174 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
175 ; RV32ZVE32F-NEXT: vzext.vf2 v8, v9
176 ; RV32ZVE32F-NEXT: ret
178 ; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i16:
179 ; RV64ZVE32F: # %bb.0:
180 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
181 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
182 ; RV64ZVE32F-NEXT: andi a3, a2, 1
183 ; RV64ZVE32F-NEXT: beqz a3, .LBB3_2
184 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
185 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
186 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
187 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
188 ; RV64ZVE32F-NEXT: .LBB3_2: # %else
189 ; RV64ZVE32F-NEXT: andi a2, a2, 2
190 ; RV64ZVE32F-NEXT: beqz a2, .LBB3_4
191 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
192 ; RV64ZVE32F-NEXT: lbu a0, 0(a1)
193 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
194 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
195 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
196 ; RV64ZVE32F-NEXT: .LBB3_4: # %else2
197 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
198 ; RV64ZVE32F-NEXT: vzext.vf2 v9, v8
199 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
200 ; RV64ZVE32F-NEXT: ret
201 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
202 %ev = zext <2 x i8> %v to <2 x i16>
206 define <2 x i32> @mgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
207 ; RV32V-LABEL: mgather_v2i8_sextload_v2i32:
209 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
210 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
211 ; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
212 ; RV32V-NEXT: vsext.vf4 v8, v9
215 ; RV64V-LABEL: mgather_v2i8_sextload_v2i32:
217 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
218 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
219 ; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
220 ; RV64V-NEXT: vsext.vf4 v8, v9
223 ; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i32:
224 ; RV32ZVE32F: # %bb.0:
225 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
226 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
227 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
228 ; RV32ZVE32F-NEXT: vsext.vf4 v8, v9
229 ; RV32ZVE32F-NEXT: ret
231 ; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i32:
232 ; RV64ZVE32F: # %bb.0:
233 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
234 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
235 ; RV64ZVE32F-NEXT: andi a3, a2, 1
236 ; RV64ZVE32F-NEXT: beqz a3, .LBB4_2
237 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
238 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
239 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
240 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
241 ; RV64ZVE32F-NEXT: .LBB4_2: # %else
242 ; RV64ZVE32F-NEXT: andi a2, a2, 2
243 ; RV64ZVE32F-NEXT: beqz a2, .LBB4_4
244 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
245 ; RV64ZVE32F-NEXT: lbu a0, 0(a1)
246 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
247 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
248 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
249 ; RV64ZVE32F-NEXT: .LBB4_4: # %else2
250 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
251 ; RV64ZVE32F-NEXT: vsext.vf4 v9, v8
252 ; RV64ZVE32F-NEXT: vmv.v.v v8, v9
253 ; RV64ZVE32F-NEXT: ret
254 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
255 %ev = sext <2 x i8> %v to <2 x i32>
259 define <2 x i32> @mgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
260 ; RV32V-LABEL: mgather_v2i8_zextload_v2i32:
262 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
263 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
264 ; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
265 ; RV32V-NEXT: vzext.vf4 v8, v9
268 ; RV64V-LABEL: mgather_v2i8_zextload_v2i32:
270 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
271 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
272 ; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
273 ; RV64V-NEXT: vzext.vf4 v8, v9
276 ; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i32:
277 ; RV32ZVE32F: # %bb.0:
278 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
279 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
280 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
281 ; RV32ZVE32F-NEXT: vzext.vf4 v8, v9
282 ; RV32ZVE32F-NEXT: ret
284 ; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i32:
285 ; RV64ZVE32F: # %bb.0:
286 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
287 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
288 ; RV64ZVE32F-NEXT: andi a3, a2, 1
289 ; RV64ZVE32F-NEXT: beqz a3, .LBB5_2
290 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
291 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
292 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
293 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
294 ; RV64ZVE32F-NEXT: .LBB5_2: # %else
295 ; RV64ZVE32F-NEXT: andi a2, a2, 2
296 ; RV64ZVE32F-NEXT: beqz a2, .LBB5_4
297 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
298 ; RV64ZVE32F-NEXT: lbu a0, 0(a1)
299 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
300 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
301 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
302 ; RV64ZVE32F-NEXT: .LBB5_4: # %else2
303 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
304 ; RV64ZVE32F-NEXT: vzext.vf4 v9, v8
305 ; RV64ZVE32F-NEXT: vmv.v.v v8, v9
306 ; RV64ZVE32F-NEXT: ret
307 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
308 %ev = zext <2 x i8> %v to <2 x i32>
312 define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
313 ; RV32V-LABEL: mgather_v2i8_sextload_v2i64:
315 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
316 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
317 ; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
318 ; RV32V-NEXT: vsext.vf8 v8, v9
321 ; RV64V-LABEL: mgather_v2i8_sextload_v2i64:
323 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
324 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
325 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
326 ; RV64V-NEXT: vsext.vf8 v8, v9
329 ; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i64:
330 ; RV32ZVE32F: # %bb.0:
331 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
332 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
333 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
334 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
335 ; RV32ZVE32F-NEXT: srai a2, a1, 31
336 ; RV32ZVE32F-NEXT: vmv.x.s a3, v9
337 ; RV32ZVE32F-NEXT: srai a4, a3, 31
338 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
339 ; RV32ZVE32F-NEXT: sw a1, 8(a0)
340 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
341 ; RV32ZVE32F-NEXT: sw a2, 12(a0)
342 ; RV32ZVE32F-NEXT: ret
344 ; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i64:
345 ; RV64ZVE32F: # %bb.0:
346 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
347 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
348 ; RV64ZVE32F-NEXT: andi a3, a2, 1
349 ; RV64ZVE32F-NEXT: beqz a3, .LBB6_2
350 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
351 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
352 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
353 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
354 ; RV64ZVE32F-NEXT: .LBB6_2: # %else
355 ; RV64ZVE32F-NEXT: andi a2, a2, 2
356 ; RV64ZVE32F-NEXT: beqz a2, .LBB6_4
357 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
358 ; RV64ZVE32F-NEXT: lbu a0, 0(a1)
359 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
360 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
361 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
362 ; RV64ZVE32F-NEXT: .LBB6_4: # %else2
363 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
364 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
365 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
366 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
367 ; RV64ZVE32F-NEXT: ret
368 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
369 %ev = sext <2 x i8> %v to <2 x i64>
373 define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
374 ; RV32V-LABEL: mgather_v2i8_zextload_v2i64:
376 ; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
377 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
378 ; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
379 ; RV32V-NEXT: vzext.vf8 v8, v9
382 ; RV64V-LABEL: mgather_v2i8_zextload_v2i64:
384 ; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
385 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
386 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
387 ; RV64V-NEXT: vzext.vf8 v8, v9
390 ; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i64:
391 ; RV32ZVE32F: # %bb.0:
392 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
393 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
394 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
395 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
396 ; RV32ZVE32F-NEXT: andi a1, a1, 255
397 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9
398 ; RV32ZVE32F-NEXT: andi a2, a2, 255
399 ; RV32ZVE32F-NEXT: sw zero, 12(a0)
400 ; RV32ZVE32F-NEXT: sw zero, 4(a0)
401 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
402 ; RV32ZVE32F-NEXT: sw a1, 8(a0)
403 ; RV32ZVE32F-NEXT: ret
405 ; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i64:
406 ; RV64ZVE32F: # %bb.0:
407 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
408 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
409 ; RV64ZVE32F-NEXT: andi a3, a2, 1
410 ; RV64ZVE32F-NEXT: beqz a3, .LBB7_2
411 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
412 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
413 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
414 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
415 ; RV64ZVE32F-NEXT: .LBB7_2: # %else
416 ; RV64ZVE32F-NEXT: andi a2, a2, 2
417 ; RV64ZVE32F-NEXT: beqz a2, .LBB7_4
418 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
419 ; RV64ZVE32F-NEXT: lbu a0, 0(a1)
420 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
421 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
422 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
423 ; RV64ZVE32F-NEXT: .LBB7_4: # %else2
424 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
425 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
426 ; RV64ZVE32F-NEXT: andi a0, a0, 255
427 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
428 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
429 ; RV64ZVE32F-NEXT: andi a1, a1, 255
430 ; RV64ZVE32F-NEXT: ret
431 %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
432 %ev = zext <2 x i8> %v to <2 x i64>
436 declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
438 define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru) {
439 ; RV32-LABEL: mgather_v4i8:
441 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
442 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
443 ; RV32-NEXT: vmv1r.v v8, v9
446 ; RV64V-LABEL: mgather_v4i8:
448 ; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
449 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
450 ; RV64V-NEXT: vmv1r.v v8, v10
453 ; RV64ZVE32F-LABEL: mgather_v4i8:
454 ; RV64ZVE32F: # %bb.0:
455 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
456 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
457 ; RV64ZVE32F-NEXT: andi a2, a1, 1
458 ; RV64ZVE32F-NEXT: bnez a2, .LBB8_5
459 ; RV64ZVE32F-NEXT: # %bb.1: # %else
460 ; RV64ZVE32F-NEXT: andi a2, a1, 2
461 ; RV64ZVE32F-NEXT: bnez a2, .LBB8_6
462 ; RV64ZVE32F-NEXT: .LBB8_2: # %else2
463 ; RV64ZVE32F-NEXT: andi a2, a1, 4
464 ; RV64ZVE32F-NEXT: bnez a2, .LBB8_7
465 ; RV64ZVE32F-NEXT: .LBB8_3: # %else5
466 ; RV64ZVE32F-NEXT: andi a1, a1, 8
467 ; RV64ZVE32F-NEXT: bnez a1, .LBB8_8
468 ; RV64ZVE32F-NEXT: .LBB8_4: # %else8
469 ; RV64ZVE32F-NEXT: ret
470 ; RV64ZVE32F-NEXT: .LBB8_5: # %cond.load
471 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
472 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
473 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, tu, ma
474 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
475 ; RV64ZVE32F-NEXT: andi a2, a1, 2
476 ; RV64ZVE32F-NEXT: beqz a2, .LBB8_2
477 ; RV64ZVE32F-NEXT: .LBB8_6: # %cond.load1
478 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
479 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
480 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
481 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
482 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
483 ; RV64ZVE32F-NEXT: andi a2, a1, 4
484 ; RV64ZVE32F-NEXT: beqz a2, .LBB8_3
485 ; RV64ZVE32F-NEXT: .LBB8_7: # %cond.load4
486 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
487 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
488 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma
489 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
490 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
491 ; RV64ZVE32F-NEXT: andi a1, a1, 8
492 ; RV64ZVE32F-NEXT: beqz a1, .LBB8_4
493 ; RV64ZVE32F-NEXT: .LBB8_8: # %cond.load7
494 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
495 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
496 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
497 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
498 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
499 ; RV64ZVE32F-NEXT: ret
500 %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %m, <4 x i8> %passthru)
504 define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) {
505 ; RV32-LABEL: mgather_truemask_v4i8:
507 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
508 ; RV32-NEXT: vluxei32.v v9, (zero), v8
509 ; RV32-NEXT: vmv1r.v v8, v9
512 ; RV64V-LABEL: mgather_truemask_v4i8:
514 ; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
515 ; RV64V-NEXT: vluxei64.v v10, (zero), v8
516 ; RV64V-NEXT: vmv1r.v v8, v10
519 ; RV64ZVE32F-LABEL: mgather_truemask_v4i8:
520 ; RV64ZVE32F: # %bb.0:
521 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
522 ; RV64ZVE32F-NEXT: vmset.m v9
523 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
524 ; RV64ZVE32F-NEXT: beqz zero, .LBB9_5
525 ; RV64ZVE32F-NEXT: # %bb.1: # %else
526 ; RV64ZVE32F-NEXT: andi a2, a1, 2
527 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_6
528 ; RV64ZVE32F-NEXT: .LBB9_2: # %else2
529 ; RV64ZVE32F-NEXT: andi a2, a1, 4
530 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_7
531 ; RV64ZVE32F-NEXT: .LBB9_3: # %else5
532 ; RV64ZVE32F-NEXT: andi a1, a1, 8
533 ; RV64ZVE32F-NEXT: bnez a1, .LBB9_8
534 ; RV64ZVE32F-NEXT: .LBB9_4: # %else8
535 ; RV64ZVE32F-NEXT: ret
536 ; RV64ZVE32F-NEXT: .LBB9_5: # %cond.load
537 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
538 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
539 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
540 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
541 ; RV64ZVE32F-NEXT: andi a2, a1, 2
542 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_2
543 ; RV64ZVE32F-NEXT: .LBB9_6: # %cond.load1
544 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
545 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
546 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
547 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
548 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
549 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
550 ; RV64ZVE32F-NEXT: andi a2, a1, 4
551 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_3
552 ; RV64ZVE32F-NEXT: .LBB9_7: # %cond.load4
553 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
554 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
555 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma
556 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
557 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
558 ; RV64ZVE32F-NEXT: andi a1, a1, 8
559 ; RV64ZVE32F-NEXT: beqz a1, .LBB9_4
560 ; RV64ZVE32F-NEXT: .LBB9_8: # %cond.load7
561 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
562 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
563 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
564 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
565 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
566 ; RV64ZVE32F-NEXT: ret
567 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
568 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
569 %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %mtrue, <4 x i8> %passthru)
573 define <4 x i8> @mgather_falsemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) {
574 ; RV32-LABEL: mgather_falsemask_v4i8:
576 ; RV32-NEXT: vmv1r.v v8, v9
579 ; RV64V-LABEL: mgather_falsemask_v4i8:
581 ; RV64V-NEXT: vmv1r.v v8, v10
584 ; RV64ZVE32F-LABEL: mgather_falsemask_v4i8:
585 ; RV64ZVE32F: # %bb.0:
586 ; RV64ZVE32F-NEXT: ret
587 %v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> zeroinitializer, <4 x i8> %passthru)
591 declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>)
593 define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) {
594 ; RV32-LABEL: mgather_v8i8:
596 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
597 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
598 ; RV32-NEXT: vmv1r.v v8, v10
601 ; RV64V-LABEL: mgather_v8i8:
603 ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
604 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
605 ; RV64V-NEXT: vmv1r.v v8, v12
608 ; RV64ZVE32F-LABEL: mgather_v8i8:
609 ; RV64ZVE32F: # %bb.0:
610 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
611 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
612 ; RV64ZVE32F-NEXT: andi a2, a1, 1
613 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_9
614 ; RV64ZVE32F-NEXT: # %bb.1: # %else
615 ; RV64ZVE32F-NEXT: andi a2, a1, 2
616 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_10
617 ; RV64ZVE32F-NEXT: .LBB11_2: # %else2
618 ; RV64ZVE32F-NEXT: andi a2, a1, 4
619 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_11
620 ; RV64ZVE32F-NEXT: .LBB11_3: # %else5
621 ; RV64ZVE32F-NEXT: andi a2, a1, 8
622 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_12
623 ; RV64ZVE32F-NEXT: .LBB11_4: # %else8
624 ; RV64ZVE32F-NEXT: andi a2, a1, 16
625 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_13
626 ; RV64ZVE32F-NEXT: .LBB11_5: # %else11
627 ; RV64ZVE32F-NEXT: andi a2, a1, 32
628 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_14
629 ; RV64ZVE32F-NEXT: .LBB11_6: # %else14
630 ; RV64ZVE32F-NEXT: andi a2, a1, 64
631 ; RV64ZVE32F-NEXT: bnez a2, .LBB11_15
632 ; RV64ZVE32F-NEXT: .LBB11_7: # %else17
633 ; RV64ZVE32F-NEXT: andi a1, a1, -128
634 ; RV64ZVE32F-NEXT: bnez a1, .LBB11_16
635 ; RV64ZVE32F-NEXT: .LBB11_8: # %else20
636 ; RV64ZVE32F-NEXT: ret
637 ; RV64ZVE32F-NEXT: .LBB11_9: # %cond.load
638 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
639 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
640 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, tu, ma
641 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
642 ; RV64ZVE32F-NEXT: andi a2, a1, 2
643 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_2
644 ; RV64ZVE32F-NEXT: .LBB11_10: # %cond.load1
645 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
646 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
647 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
648 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
649 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
650 ; RV64ZVE32F-NEXT: andi a2, a1, 4
651 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_3
652 ; RV64ZVE32F-NEXT: .LBB11_11: # %cond.load4
653 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
654 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
655 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
656 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
657 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
658 ; RV64ZVE32F-NEXT: andi a2, a1, 8
659 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_4
660 ; RV64ZVE32F-NEXT: .LBB11_12: # %cond.load7
661 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
662 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
663 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
664 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
665 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
666 ; RV64ZVE32F-NEXT: andi a2, a1, 16
667 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_5
668 ; RV64ZVE32F-NEXT: .LBB11_13: # %cond.load10
669 ; RV64ZVE32F-NEXT: ld a2, 32(a0)
670 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
671 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
672 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
673 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
674 ; RV64ZVE32F-NEXT: andi a2, a1, 32
675 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_6
676 ; RV64ZVE32F-NEXT: .LBB11_14: # %cond.load13
677 ; RV64ZVE32F-NEXT: ld a2, 40(a0)
678 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
679 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
680 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
681 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
682 ; RV64ZVE32F-NEXT: andi a2, a1, 64
683 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_7
684 ; RV64ZVE32F-NEXT: .LBB11_15: # %cond.load16
685 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
686 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
687 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma
688 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
689 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
690 ; RV64ZVE32F-NEXT: andi a1, a1, -128
691 ; RV64ZVE32F-NEXT: beqz a1, .LBB11_8
692 ; RV64ZVE32F-NEXT: .LBB11_16: # %cond.load19
693 ; RV64ZVE32F-NEXT: ld a0, 56(a0)
694 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
695 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
696 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
697 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
698 ; RV64ZVE32F-NEXT: ret
699 %v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru)
703 define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i8> %passthru) {
704 ; RV32-LABEL: mgather_baseidx_v8i8:
706 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
707 ; RV32-NEXT: vsext.vf4 v10, v8
708 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
709 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
710 ; RV32-NEXT: vmv1r.v v8, v9
713 ; RV64V-LABEL: mgather_baseidx_v8i8:
715 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
716 ; RV64V-NEXT: vsext.vf8 v12, v8
717 ; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
718 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
719 ; RV64V-NEXT: vmv1r.v v8, v9
722 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8:
723 ; RV64ZVE32F: # %bb.0:
724 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
725 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
726 ; RV64ZVE32F-NEXT: andi a2, a1, 1
727 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_2
728 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
729 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
730 ; RV64ZVE32F-NEXT: add a2, a0, a2
731 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
732 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, tu, ma
733 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
734 ; RV64ZVE32F-NEXT: .LBB12_2: # %else
735 ; RV64ZVE32F-NEXT: andi a2, a1, 2
736 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_4
737 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
738 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
739 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
740 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
741 ; RV64ZVE32F-NEXT: add a2, a0, a2
742 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
743 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
744 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
745 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
746 ; RV64ZVE32F-NEXT: .LBB12_4: # %else2
747 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
748 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
749 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
750 ; RV64ZVE32F-NEXT: andi a2, a1, 4
751 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
752 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_12
753 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
754 ; RV64ZVE32F-NEXT: andi a2, a1, 8
755 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_13
756 ; RV64ZVE32F-NEXT: .LBB12_6: # %else8
757 ; RV64ZVE32F-NEXT: andi a2, a1, 16
758 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_14
759 ; RV64ZVE32F-NEXT: .LBB12_7: # %else11
760 ; RV64ZVE32F-NEXT: andi a2, a1, 32
761 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_9
762 ; RV64ZVE32F-NEXT: .LBB12_8: # %cond.load13
763 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
764 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
765 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
766 ; RV64ZVE32F-NEXT: add a2, a0, a2
767 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
768 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
769 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
770 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
771 ; RV64ZVE32F-NEXT: .LBB12_9: # %else14
772 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
773 ; RV64ZVE32F-NEXT: andi a2, a1, 64
774 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
775 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_15
776 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
777 ; RV64ZVE32F-NEXT: andi a1, a1, -128
778 ; RV64ZVE32F-NEXT: bnez a1, .LBB12_16
779 ; RV64ZVE32F-NEXT: .LBB12_11: # %else20
780 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
781 ; RV64ZVE32F-NEXT: ret
782 ; RV64ZVE32F-NEXT: .LBB12_12: # %cond.load4
783 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
784 ; RV64ZVE32F-NEXT: add a2, a0, a2
785 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
786 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
787 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
788 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
789 ; RV64ZVE32F-NEXT: andi a2, a1, 8
790 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_6
791 ; RV64ZVE32F-NEXT: .LBB12_13: # %cond.load7
792 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
793 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
794 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
795 ; RV64ZVE32F-NEXT: add a2, a0, a2
796 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
797 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
798 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
799 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
800 ; RV64ZVE32F-NEXT: andi a2, a1, 16
801 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_7
802 ; RV64ZVE32F-NEXT: .LBB12_14: # %cond.load10
803 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
804 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
805 ; RV64ZVE32F-NEXT: add a2, a0, a2
806 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
807 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
808 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
809 ; RV64ZVE32F-NEXT: andi a2, a1, 32
810 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_8
811 ; RV64ZVE32F-NEXT: j .LBB12_9
812 ; RV64ZVE32F-NEXT: .LBB12_15: # %cond.load16
813 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
814 ; RV64ZVE32F-NEXT: add a2, a0, a2
815 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
816 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
817 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma
818 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
819 ; RV64ZVE32F-NEXT: andi a1, a1, -128
820 ; RV64ZVE32F-NEXT: beqz a1, .LBB12_11
821 ; RV64ZVE32F-NEXT: .LBB12_16: # %cond.load19
822 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
823 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
824 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
825 ; RV64ZVE32F-NEXT: add a0, a0, a1
826 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
827 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
828 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
829 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
830 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
831 ; RV64ZVE32F-NEXT: ret
832 %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
833 %v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru)
837 declare <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i16>)
839 define <1 x i16> @mgather_v1i16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i16> %passthru) {
840 ; RV32V-LABEL: mgather_v1i16:
842 ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
843 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
844 ; RV32V-NEXT: vmv1r.v v8, v9
847 ; RV64V-LABEL: mgather_v1i16:
849 ; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
850 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
851 ; RV64V-NEXT: vmv1r.v v8, v9
854 ; RV32ZVE32F-LABEL: mgather_v1i16:
855 ; RV32ZVE32F: # %bb.0:
856 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
857 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
858 ; RV32ZVE32F-NEXT: vmv1r.v v8, v9
859 ; RV32ZVE32F-NEXT: ret
861 ; RV64ZVE32F-LABEL: mgather_v1i16:
862 ; RV64ZVE32F: # %bb.0:
863 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
864 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
865 ; RV64ZVE32F-NEXT: bnez a1, .LBB13_2
866 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
867 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
868 ; RV64ZVE32F-NEXT: vle16.v v8, (a0)
869 ; RV64ZVE32F-NEXT: .LBB13_2: # %else
870 ; RV64ZVE32F-NEXT: ret
871 %v = call <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x i16> %passthru)
875 declare <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i16>)
877 define <2 x i16> @mgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
878 ; RV32V-LABEL: mgather_v2i16:
880 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
881 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
882 ; RV32V-NEXT: vmv1r.v v8, v9
885 ; RV64V-LABEL: mgather_v2i16:
887 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
888 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
889 ; RV64V-NEXT: vmv1r.v v8, v9
892 ; RV32ZVE32F-LABEL: mgather_v2i16:
893 ; RV32ZVE32F: # %bb.0:
894 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
895 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
896 ; RV32ZVE32F-NEXT: vmv1r.v v8, v9
897 ; RV32ZVE32F-NEXT: ret
899 ; RV64ZVE32F-LABEL: mgather_v2i16:
900 ; RV64ZVE32F: # %bb.0:
901 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
902 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
903 ; RV64ZVE32F-NEXT: andi a3, a2, 1
904 ; RV64ZVE32F-NEXT: bnez a3, .LBB14_3
905 ; RV64ZVE32F-NEXT: # %bb.1: # %else
906 ; RV64ZVE32F-NEXT: andi a2, a2, 2
907 ; RV64ZVE32F-NEXT: bnez a2, .LBB14_4
908 ; RV64ZVE32F-NEXT: .LBB14_2: # %else2
909 ; RV64ZVE32F-NEXT: ret
910 ; RV64ZVE32F-NEXT: .LBB14_3: # %cond.load
911 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
912 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
913 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
914 ; RV64ZVE32F-NEXT: andi a2, a2, 2
915 ; RV64ZVE32F-NEXT: beqz a2, .LBB14_2
916 ; RV64ZVE32F-NEXT: .LBB14_4: # %cond.load1
917 ; RV64ZVE32F-NEXT: lh a0, 0(a1)
918 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
919 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
920 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
921 ; RV64ZVE32F-NEXT: ret
922 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
926 define <2 x i32> @mgather_v2i16_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
927 ; RV32V-LABEL: mgather_v2i16_sextload_v2i32:
929 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
930 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
931 ; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
932 ; RV32V-NEXT: vsext.vf2 v8, v9
935 ; RV64V-LABEL: mgather_v2i16_sextload_v2i32:
937 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
938 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
939 ; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
940 ; RV64V-NEXT: vsext.vf2 v8, v9
943 ; RV32ZVE32F-LABEL: mgather_v2i16_sextload_v2i32:
944 ; RV32ZVE32F: # %bb.0:
945 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
946 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
947 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
948 ; RV32ZVE32F-NEXT: vsext.vf2 v8, v9
949 ; RV32ZVE32F-NEXT: ret
951 ; RV64ZVE32F-LABEL: mgather_v2i16_sextload_v2i32:
952 ; RV64ZVE32F: # %bb.0:
953 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
954 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
955 ; RV64ZVE32F-NEXT: andi a3, a2, 1
956 ; RV64ZVE32F-NEXT: beqz a3, .LBB15_2
957 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
958 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
959 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
960 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
961 ; RV64ZVE32F-NEXT: .LBB15_2: # %else
962 ; RV64ZVE32F-NEXT: andi a2, a2, 2
963 ; RV64ZVE32F-NEXT: beqz a2, .LBB15_4
964 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
965 ; RV64ZVE32F-NEXT: lh a0, 0(a1)
966 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
967 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
968 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
969 ; RV64ZVE32F-NEXT: .LBB15_4: # %else2
970 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
971 ; RV64ZVE32F-NEXT: vsext.vf2 v9, v8
972 ; RV64ZVE32F-NEXT: vmv.v.v v8, v9
973 ; RV64ZVE32F-NEXT: ret
974 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
975 %ev = sext <2 x i16> %v to <2 x i32>
979 define <2 x i32> @mgather_v2i16_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
980 ; RV32V-LABEL: mgather_v2i16_zextload_v2i32:
982 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
983 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
984 ; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
985 ; RV32V-NEXT: vzext.vf2 v8, v9
988 ; RV64V-LABEL: mgather_v2i16_zextload_v2i32:
990 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
991 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
992 ; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
993 ; RV64V-NEXT: vzext.vf2 v8, v9
996 ; RV32ZVE32F-LABEL: mgather_v2i16_zextload_v2i32:
997 ; RV32ZVE32F: # %bb.0:
998 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
999 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
1000 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1001 ; RV32ZVE32F-NEXT: vzext.vf2 v8, v9
1002 ; RV32ZVE32F-NEXT: ret
1004 ; RV64ZVE32F-LABEL: mgather_v2i16_zextload_v2i32:
1005 ; RV64ZVE32F: # %bb.0:
1006 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1007 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
1008 ; RV64ZVE32F-NEXT: andi a3, a2, 1
1009 ; RV64ZVE32F-NEXT: beqz a3, .LBB16_2
1010 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
1011 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1012 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
1013 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
1014 ; RV64ZVE32F-NEXT: .LBB16_2: # %else
1015 ; RV64ZVE32F-NEXT: andi a2, a2, 2
1016 ; RV64ZVE32F-NEXT: beqz a2, .LBB16_4
1017 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
1018 ; RV64ZVE32F-NEXT: lh a0, 0(a1)
1019 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1020 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
1021 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
1022 ; RV64ZVE32F-NEXT: .LBB16_4: # %else2
1023 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
1024 ; RV64ZVE32F-NEXT: vzext.vf2 v9, v8
1025 ; RV64ZVE32F-NEXT: vmv.v.v v8, v9
1026 ; RV64ZVE32F-NEXT: ret
1027 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
1028 %ev = zext <2 x i16> %v to <2 x i32>
1032 define <2 x i64> @mgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
1033 ; RV32V-LABEL: mgather_v2i16_sextload_v2i64:
1035 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
1036 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
1037 ; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1038 ; RV32V-NEXT: vsext.vf4 v8, v9
1041 ; RV64V-LABEL: mgather_v2i16_sextload_v2i64:
1043 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
1044 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
1045 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1046 ; RV64V-NEXT: vsext.vf4 v8, v9
1049 ; RV32ZVE32F-LABEL: mgather_v2i16_sextload_v2i64:
1050 ; RV32ZVE32F: # %bb.0:
1051 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
1052 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
1053 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
1054 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
1055 ; RV32ZVE32F-NEXT: srai a2, a1, 31
1056 ; RV32ZVE32F-NEXT: vmv.x.s a3, v9
1057 ; RV32ZVE32F-NEXT: srai a4, a3, 31
1058 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
1059 ; RV32ZVE32F-NEXT: sw a1, 8(a0)
1060 ; RV32ZVE32F-NEXT: sw a4, 4(a0)
1061 ; RV32ZVE32F-NEXT: sw a2, 12(a0)
1062 ; RV32ZVE32F-NEXT: ret
1064 ; RV64ZVE32F-LABEL: mgather_v2i16_sextload_v2i64:
1065 ; RV64ZVE32F: # %bb.0:
1066 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1067 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
1068 ; RV64ZVE32F-NEXT: andi a3, a2, 1
1069 ; RV64ZVE32F-NEXT: beqz a3, .LBB17_2
1070 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
1071 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1072 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
1073 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
1074 ; RV64ZVE32F-NEXT: .LBB17_2: # %else
1075 ; RV64ZVE32F-NEXT: andi a2, a2, 2
1076 ; RV64ZVE32F-NEXT: beqz a2, .LBB17_4
1077 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
1078 ; RV64ZVE32F-NEXT: lh a0, 0(a1)
1079 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1080 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
1081 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
1082 ; RV64ZVE32F-NEXT: .LBB17_4: # %else2
1083 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1084 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
1085 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1086 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
1087 ; RV64ZVE32F-NEXT: ret
1088 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
1089 %ev = sext <2 x i16> %v to <2 x i64>
1093 define <2 x i64> @mgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
1094 ; RV32V-LABEL: mgather_v2i16_zextload_v2i64:
1096 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
1097 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
1098 ; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1099 ; RV32V-NEXT: vzext.vf4 v8, v9
1102 ; RV64V-LABEL: mgather_v2i16_zextload_v2i64:
1104 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
1105 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
1106 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1107 ; RV64V-NEXT: vzext.vf4 v8, v9
1110 ; RV32ZVE32F-LABEL: mgather_v2i16_zextload_v2i64:
1111 ; RV32ZVE32F: # %bb.0:
1112 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
1113 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
1114 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
1115 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
1116 ; RV32ZVE32F-NEXT: lui a2, 16
1117 ; RV32ZVE32F-NEXT: addi a2, a2, -1
1118 ; RV32ZVE32F-NEXT: and a1, a1, a2
1119 ; RV32ZVE32F-NEXT: vmv.x.s a3, v9
1120 ; RV32ZVE32F-NEXT: and a2, a3, a2
1121 ; RV32ZVE32F-NEXT: sw zero, 12(a0)
1122 ; RV32ZVE32F-NEXT: sw zero, 4(a0)
1123 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
1124 ; RV32ZVE32F-NEXT: sw a1, 8(a0)
1125 ; RV32ZVE32F-NEXT: ret
1127 ; RV64ZVE32F-LABEL: mgather_v2i16_zextload_v2i64:
1128 ; RV64ZVE32F: # %bb.0:
1129 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1130 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
1131 ; RV64ZVE32F-NEXT: andi a3, a2, 1
1132 ; RV64ZVE32F-NEXT: beqz a3, .LBB18_2
1133 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
1134 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1135 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
1136 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
1137 ; RV64ZVE32F-NEXT: .LBB18_2: # %else
1138 ; RV64ZVE32F-NEXT: andi a2, a2, 2
1139 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_4
1140 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
1141 ; RV64ZVE32F-NEXT: lh a0, 0(a1)
1142 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1143 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
1144 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
1145 ; RV64ZVE32F-NEXT: .LBB18_4: # %else2
1146 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1147 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
1148 ; RV64ZVE32F-NEXT: lui a1, 16
1149 ; RV64ZVE32F-NEXT: addiw a1, a1, -1
1150 ; RV64ZVE32F-NEXT: and a0, a0, a1
1151 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1152 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1153 ; RV64ZVE32F-NEXT: and a1, a2, a1
1154 ; RV64ZVE32F-NEXT: ret
1155 %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
1156 %ev = zext <2 x i16> %v to <2 x i64>
1160 declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
1162 define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthru) {
1163 ; RV32-LABEL: mgather_v4i16:
1165 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
1166 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
1167 ; RV32-NEXT: vmv1r.v v8, v9
1170 ; RV64V-LABEL: mgather_v4i16:
1172 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
1173 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
1174 ; RV64V-NEXT: vmv1r.v v8, v10
1177 ; RV64ZVE32F-LABEL: mgather_v4i16:
1178 ; RV64ZVE32F: # %bb.0:
1179 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1180 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1181 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1182 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_5
1183 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1184 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1185 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_6
1186 ; RV64ZVE32F-NEXT: .LBB19_2: # %else2
1187 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1188 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_7
1189 ; RV64ZVE32F-NEXT: .LBB19_3: # %else5
1190 ; RV64ZVE32F-NEXT: andi a1, a1, 8
1191 ; RV64ZVE32F-NEXT: bnez a1, .LBB19_8
1192 ; RV64ZVE32F-NEXT: .LBB19_4: # %else8
1193 ; RV64ZVE32F-NEXT: ret
1194 ; RV64ZVE32F-NEXT: .LBB19_5: # %cond.load
1195 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
1196 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1197 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
1198 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1199 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1200 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_2
1201 ; RV64ZVE32F-NEXT: .LBB19_6: # %cond.load1
1202 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
1203 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1204 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
1205 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1206 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
1207 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1208 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_3
1209 ; RV64ZVE32F-NEXT: .LBB19_7: # %cond.load4
1210 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
1211 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1212 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
1213 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1214 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
1215 ; RV64ZVE32F-NEXT: andi a1, a1, 8
1216 ; RV64ZVE32F-NEXT: beqz a1, .LBB19_4
1217 ; RV64ZVE32F-NEXT: .LBB19_8: # %cond.load7
1218 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
1219 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1220 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1221 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
1222 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
1223 ; RV64ZVE32F-NEXT: ret
1224 %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x i16> %passthru)
1228 define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) {
1229 ; RV32-LABEL: mgather_truemask_v4i16:
1231 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1232 ; RV32-NEXT: vluxei32.v v9, (zero), v8
1233 ; RV32-NEXT: vmv1r.v v8, v9
1236 ; RV64V-LABEL: mgather_truemask_v4i16:
1238 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1239 ; RV64V-NEXT: vluxei64.v v10, (zero), v8
1240 ; RV64V-NEXT: vmv1r.v v8, v10
1243 ; RV64ZVE32F-LABEL: mgather_truemask_v4i16:
1244 ; RV64ZVE32F: # %bb.0:
1245 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1246 ; RV64ZVE32F-NEXT: vmset.m v9
1247 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
1248 ; RV64ZVE32F-NEXT: beqz zero, .LBB20_5
1249 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1250 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1251 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_6
1252 ; RV64ZVE32F-NEXT: .LBB20_2: # %else2
1253 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1254 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_7
1255 ; RV64ZVE32F-NEXT: .LBB20_3: # %else5
1256 ; RV64ZVE32F-NEXT: andi a1, a1, 8
1257 ; RV64ZVE32F-NEXT: bnez a1, .LBB20_8
1258 ; RV64ZVE32F-NEXT: .LBB20_4: # %else8
1259 ; RV64ZVE32F-NEXT: ret
1260 ; RV64ZVE32F-NEXT: .LBB20_5: # %cond.load
1261 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
1262 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1263 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma
1264 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1265 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1266 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_2
1267 ; RV64ZVE32F-NEXT: .LBB20_6: # %cond.load1
1268 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
1269 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1270 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1271 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1272 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
1273 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
1274 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1275 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_3
1276 ; RV64ZVE32F-NEXT: .LBB20_7: # %cond.load4
1277 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
1278 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1279 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
1280 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1281 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
1282 ; RV64ZVE32F-NEXT: andi a1, a1, 8
1283 ; RV64ZVE32F-NEXT: beqz a1, .LBB20_4
1284 ; RV64ZVE32F-NEXT: .LBB20_8: # %cond.load7
1285 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
1286 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1287 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1288 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
1289 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
1290 ; RV64ZVE32F-NEXT: ret
1291 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
1292 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
1293 %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %mtrue, <4 x i16> %passthru)
1297 define <4 x i16> @mgather_falsemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) {
1298 ; RV32-LABEL: mgather_falsemask_v4i16:
1300 ; RV32-NEXT: vmv1r.v v8, v9
1303 ; RV64V-LABEL: mgather_falsemask_v4i16:
1305 ; RV64V-NEXT: vmv1r.v v8, v10
1308 ; RV64ZVE32F-LABEL: mgather_falsemask_v4i16:
1309 ; RV64ZVE32F: # %bb.0:
1310 ; RV64ZVE32F-NEXT: ret
1311 %v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x i16> %passthru)
1315 declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>)
1317 define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthru) {
1318 ; RV32-LABEL: mgather_v8i16:
1320 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
1321 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
1322 ; RV32-NEXT: vmv.v.v v8, v10
1325 ; RV64V-LABEL: mgather_v8i16:
1327 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
1328 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
1329 ; RV64V-NEXT: vmv.v.v v8, v12
1332 ; RV64ZVE32F-LABEL: mgather_v8i16:
1333 ; RV64ZVE32F: # %bb.0:
1334 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1335 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1336 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1337 ; RV64ZVE32F-NEXT: bnez a2, .LBB22_9
1338 ; RV64ZVE32F-NEXT: # %bb.1: # %else
1339 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1340 ; RV64ZVE32F-NEXT: bnez a2, .LBB22_10
1341 ; RV64ZVE32F-NEXT: .LBB22_2: # %else2
1342 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1343 ; RV64ZVE32F-NEXT: bnez a2, .LBB22_11
1344 ; RV64ZVE32F-NEXT: .LBB22_3: # %else5
1345 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1346 ; RV64ZVE32F-NEXT: bnez a2, .LBB22_12
1347 ; RV64ZVE32F-NEXT: .LBB22_4: # %else8
1348 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1349 ; RV64ZVE32F-NEXT: bnez a2, .LBB22_13
1350 ; RV64ZVE32F-NEXT: .LBB22_5: # %else11
1351 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1352 ; RV64ZVE32F-NEXT: bnez a2, .LBB22_14
1353 ; RV64ZVE32F-NEXT: .LBB22_6: # %else14
1354 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1355 ; RV64ZVE32F-NEXT: bnez a2, .LBB22_15
1356 ; RV64ZVE32F-NEXT: .LBB22_7: # %else17
1357 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1358 ; RV64ZVE32F-NEXT: bnez a1, .LBB22_16
1359 ; RV64ZVE32F-NEXT: .LBB22_8: # %else20
1360 ; RV64ZVE32F-NEXT: ret
1361 ; RV64ZVE32F-NEXT: .LBB22_9: # %cond.load
1362 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
1363 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1364 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma
1365 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1366 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1367 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_2
1368 ; RV64ZVE32F-NEXT: .LBB22_10: # %cond.load1
1369 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
1370 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1371 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
1372 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1373 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
1374 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1375 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_3
1376 ; RV64ZVE32F-NEXT: .LBB22_11: # %cond.load4
1377 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
1378 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1379 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
1380 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1381 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
1382 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1383 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_4
1384 ; RV64ZVE32F-NEXT: .LBB22_12: # %cond.load7
1385 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
1386 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1387 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
1388 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1389 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
1390 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1391 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_5
1392 ; RV64ZVE32F-NEXT: .LBB22_13: # %cond.load10
1393 ; RV64ZVE32F-NEXT: ld a2, 32(a0)
1394 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1395 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
1396 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1397 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
1398 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1399 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_6
1400 ; RV64ZVE32F-NEXT: .LBB22_14: # %cond.load13
1401 ; RV64ZVE32F-NEXT: ld a2, 40(a0)
1402 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1403 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
1404 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1405 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
1406 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1407 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_7
1408 ; RV64ZVE32F-NEXT: .LBB22_15: # %cond.load16
1409 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
1410 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1411 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
1412 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1413 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
1414 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1415 ; RV64ZVE32F-NEXT: beqz a1, .LBB22_8
1416 ; RV64ZVE32F-NEXT: .LBB22_16: # %cond.load19
1417 ; RV64ZVE32F-NEXT: ld a0, 56(a0)
1418 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1419 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1420 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
1421 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
1422 ; RV64ZVE32F-NEXT: ret
1423 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
1427 define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
1428 ; RV32-LABEL: mgather_baseidx_v8i8_v8i16:
1430 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1431 ; RV32-NEXT: vsext.vf4 v10, v8
1432 ; RV32-NEXT: vadd.vv v10, v10, v10
1433 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
1434 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
1435 ; RV32-NEXT: vmv.v.v v8, v9
1438 ; RV64V-LABEL: mgather_baseidx_v8i8_v8i16:
1440 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1441 ; RV64V-NEXT: vsext.vf8 v12, v8
1442 ; RV64V-NEXT: vadd.vv v12, v12, v12
1443 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
1444 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
1445 ; RV64V-NEXT: vmv.v.v v8, v9
1448 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i16:
1449 ; RV64ZVE32F: # %bb.0:
1450 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1451 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1452 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1453 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_2
1454 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
1455 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1456 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1457 ; RV64ZVE32F-NEXT: add a2, a0, a2
1458 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1459 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma
1460 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1461 ; RV64ZVE32F-NEXT: .LBB23_2: # %else
1462 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1463 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_4
1464 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
1465 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1466 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1467 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1468 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1469 ; RV64ZVE32F-NEXT: add a2, a0, a2
1470 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1471 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1472 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1473 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
1474 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
1475 ; RV64ZVE32F-NEXT: .LBB23_4: # %else2
1476 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1477 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
1478 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1479 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1480 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
1481 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_12
1482 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
1483 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1484 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_13
1485 ; RV64ZVE32F-NEXT: .LBB23_6: # %else8
1486 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1487 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_14
1488 ; RV64ZVE32F-NEXT: .LBB23_7: # %else11
1489 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1490 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_9
1491 ; RV64ZVE32F-NEXT: .LBB23_8: # %cond.load13
1492 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1493 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
1494 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1495 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1496 ; RV64ZVE32F-NEXT: add a2, a0, a2
1497 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1498 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1499 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1500 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
1501 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
1502 ; RV64ZVE32F-NEXT: .LBB23_9: # %else14
1503 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1504 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1505 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
1506 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_15
1507 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
1508 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1509 ; RV64ZVE32F-NEXT: bnez a1, .LBB23_16
1510 ; RV64ZVE32F-NEXT: .LBB23_11: # %else20
1511 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
1512 ; RV64ZVE32F-NEXT: ret
1513 ; RV64ZVE32F-NEXT: .LBB23_12: # %cond.load4
1514 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1515 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1516 ; RV64ZVE32F-NEXT: add a2, a0, a2
1517 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1518 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1519 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
1520 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
1521 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
1522 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1523 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_6
1524 ; RV64ZVE32F-NEXT: .LBB23_13: # %cond.load7
1525 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1526 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1527 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1528 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1529 ; RV64ZVE32F-NEXT: add a2, a0, a2
1530 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1531 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1532 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1533 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
1534 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
1535 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1536 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_7
1537 ; RV64ZVE32F-NEXT: .LBB23_14: # %cond.load10
1538 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1539 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1540 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1541 ; RV64ZVE32F-NEXT: add a2, a0, a2
1542 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1543 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
1544 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1545 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
1546 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1547 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_8
1548 ; RV64ZVE32F-NEXT: j .LBB23_9
1549 ; RV64ZVE32F-NEXT: .LBB23_15: # %cond.load16
1550 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1551 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1552 ; RV64ZVE32F-NEXT: add a2, a0, a2
1553 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1554 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1555 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1556 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
1557 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
1558 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1559 ; RV64ZVE32F-NEXT: beqz a1, .LBB23_11
1560 ; RV64ZVE32F-NEXT: .LBB23_16: # %cond.load19
1561 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1562 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1563 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
1564 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1565 ; RV64ZVE32F-NEXT: add a0, a0, a1
1566 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1567 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1568 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
1569 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1570 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
1571 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
1572 ; RV64ZVE32F-NEXT: ret
1573 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
1574 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
1578 define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
1579 ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i16:
1581 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1582 ; RV32-NEXT: vsext.vf4 v10, v8
1583 ; RV32-NEXT: vadd.vv v10, v10, v10
1584 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
1585 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
1586 ; RV32-NEXT: vmv.v.v v8, v9
1589 ; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i16:
1591 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1592 ; RV64V-NEXT: vsext.vf8 v12, v8
1593 ; RV64V-NEXT: vadd.vv v12, v12, v12
1594 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
1595 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
1596 ; RV64V-NEXT: vmv.v.v v8, v9
1599 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i16:
1600 ; RV64ZVE32F: # %bb.0:
1601 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1602 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1603 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1604 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_2
1605 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
1606 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1607 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1608 ; RV64ZVE32F-NEXT: add a2, a0, a2
1609 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1610 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma
1611 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1612 ; RV64ZVE32F-NEXT: .LBB24_2: # %else
1613 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1614 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_4
1615 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
1616 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1617 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1618 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1619 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1620 ; RV64ZVE32F-NEXT: add a2, a0, a2
1621 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1622 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1623 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1624 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
1625 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
1626 ; RV64ZVE32F-NEXT: .LBB24_4: # %else2
1627 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1628 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
1629 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1630 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1631 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
1632 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_12
1633 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
1634 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1635 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_13
1636 ; RV64ZVE32F-NEXT: .LBB24_6: # %else8
1637 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1638 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_14
1639 ; RV64ZVE32F-NEXT: .LBB24_7: # %else11
1640 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1641 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_9
1642 ; RV64ZVE32F-NEXT: .LBB24_8: # %cond.load13
1643 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1644 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
1645 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1646 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1647 ; RV64ZVE32F-NEXT: add a2, a0, a2
1648 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1649 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1650 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1651 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
1652 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
1653 ; RV64ZVE32F-NEXT: .LBB24_9: # %else14
1654 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1655 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1656 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
1657 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_15
1658 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
1659 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1660 ; RV64ZVE32F-NEXT: bnez a1, .LBB24_16
1661 ; RV64ZVE32F-NEXT: .LBB24_11: # %else20
1662 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
1663 ; RV64ZVE32F-NEXT: ret
1664 ; RV64ZVE32F-NEXT: .LBB24_12: # %cond.load4
1665 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1666 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1667 ; RV64ZVE32F-NEXT: add a2, a0, a2
1668 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1669 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1670 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
1671 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
1672 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
1673 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1674 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_6
1675 ; RV64ZVE32F-NEXT: .LBB24_13: # %cond.load7
1676 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1677 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1678 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1679 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1680 ; RV64ZVE32F-NEXT: add a2, a0, a2
1681 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1682 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1683 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1684 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
1685 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
1686 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1687 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_7
1688 ; RV64ZVE32F-NEXT: .LBB24_14: # %cond.load10
1689 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1690 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1691 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1692 ; RV64ZVE32F-NEXT: add a2, a0, a2
1693 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1694 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
1695 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1696 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
1697 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1698 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_8
1699 ; RV64ZVE32F-NEXT: j .LBB24_9
1700 ; RV64ZVE32F-NEXT: .LBB24_15: # %cond.load16
1701 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1702 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1703 ; RV64ZVE32F-NEXT: add a2, a0, a2
1704 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1705 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1706 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1707 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
1708 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
1709 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1710 ; RV64ZVE32F-NEXT: beqz a1, .LBB24_11
1711 ; RV64ZVE32F-NEXT: .LBB24_16: # %cond.load19
1712 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1713 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1714 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
1715 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1716 ; RV64ZVE32F-NEXT: add a0, a0, a1
1717 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1718 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1719 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
1720 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1721 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
1722 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
1723 ; RV64ZVE32F-NEXT: ret
1724 %eidxs = sext <8 x i8> %idxs to <8 x i16>
1725 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
1726 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
1730 define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
1731 ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i16:
1733 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1734 ; RV32-NEXT: vwaddu.vv v10, v8, v8
1735 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
1736 ; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t
1737 ; RV32-NEXT: vmv.v.v v8, v9
1740 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i16:
1742 ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
1743 ; RV64V-NEXT: vwaddu.vv v10, v8, v8
1744 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
1745 ; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t
1746 ; RV64V-NEXT: vmv.v.v v8, v9
1749 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i16:
1750 ; RV64ZVE32F: # %bb.0:
1751 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1752 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1753 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1754 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_2
1755 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
1756 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1757 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1758 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1759 ; RV64ZVE32F-NEXT: add a2, a0, a2
1760 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1761 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma
1762 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1763 ; RV64ZVE32F-NEXT: .LBB25_2: # %else
1764 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1765 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_4
1766 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
1767 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1768 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1769 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1770 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1771 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1772 ; RV64ZVE32F-NEXT: add a2, a0, a2
1773 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1774 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1775 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1776 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
1777 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
1778 ; RV64ZVE32F-NEXT: .LBB25_4: # %else2
1779 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
1780 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
1781 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1782 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1783 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
1784 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_12
1785 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
1786 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1787 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_13
1788 ; RV64ZVE32F-NEXT: .LBB25_6: # %else8
1789 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1790 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_14
1791 ; RV64ZVE32F-NEXT: .LBB25_7: # %else11
1792 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1793 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_9
1794 ; RV64ZVE32F-NEXT: .LBB25_8: # %cond.load13
1795 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1796 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
1797 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1798 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1799 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1800 ; RV64ZVE32F-NEXT: add a2, a0, a2
1801 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1802 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1803 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1804 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
1805 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
1806 ; RV64ZVE32F-NEXT: .LBB25_9: # %else14
1807 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1808 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1809 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
1810 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_15
1811 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
1812 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1813 ; RV64ZVE32F-NEXT: bnez a1, .LBB25_16
1814 ; RV64ZVE32F-NEXT: .LBB25_11: # %else20
1815 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
1816 ; RV64ZVE32F-NEXT: ret
1817 ; RV64ZVE32F-NEXT: .LBB25_12: # %cond.load4
1818 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1819 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1820 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1821 ; RV64ZVE32F-NEXT: add a2, a0, a2
1822 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1823 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1824 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
1825 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
1826 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
1827 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1828 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_6
1829 ; RV64ZVE32F-NEXT: .LBB25_13: # %cond.load7
1830 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1831 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1832 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1833 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1834 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1835 ; RV64ZVE32F-NEXT: add a2, a0, a2
1836 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1837 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1838 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1839 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
1840 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
1841 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1842 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_7
1843 ; RV64ZVE32F-NEXT: .LBB25_14: # %cond.load10
1844 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1845 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1846 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1847 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1848 ; RV64ZVE32F-NEXT: add a2, a0, a2
1849 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1850 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
1851 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1852 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
1853 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1854 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_8
1855 ; RV64ZVE32F-NEXT: j .LBB25_9
1856 ; RV64ZVE32F-NEXT: .LBB25_15: # %cond.load16
1857 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1858 ; RV64ZVE32F-NEXT: andi a2, a2, 255
1859 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1860 ; RV64ZVE32F-NEXT: add a2, a0, a2
1861 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1862 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1863 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1864 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
1865 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
1866 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1867 ; RV64ZVE32F-NEXT: beqz a1, .LBB25_11
1868 ; RV64ZVE32F-NEXT: .LBB25_16: # %cond.load19
1869 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1870 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1871 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
1872 ; RV64ZVE32F-NEXT: andi a1, a1, 255
1873 ; RV64ZVE32F-NEXT: slli a1, a1, 1
1874 ; RV64ZVE32F-NEXT: add a0, a0, a1
1875 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
1876 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1877 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
1878 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1879 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
1880 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
1881 ; RV64ZVE32F-NEXT: ret
1882 %eidxs = zext <8 x i8> %idxs to <8 x i16>
1883 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
1884 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
1888 define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
1889 ; RV32-LABEL: mgather_baseidx_v8i16:
1891 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
1892 ; RV32-NEXT: vwadd.vv v10, v8, v8
1893 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
1894 ; RV32-NEXT: vmv.v.v v8, v9
1897 ; RV64V-LABEL: mgather_baseidx_v8i16:
1899 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1900 ; RV64V-NEXT: vsext.vf4 v12, v8
1901 ; RV64V-NEXT: vadd.vv v12, v12, v12
1902 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
1903 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
1904 ; RV64V-NEXT: vmv.v.v v8, v9
1907 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i16:
1908 ; RV64ZVE32F: # %bb.0:
1909 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
1910 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
1911 ; RV64ZVE32F-NEXT: andi a2, a1, 1
1912 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_2
1913 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
1914 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma
1915 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1916 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1917 ; RV64ZVE32F-NEXT: add a2, a0, a2
1918 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1919 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
1920 ; RV64ZVE32F-NEXT: .LBB26_2: # %else
1921 ; RV64ZVE32F-NEXT: andi a2, a1, 2
1922 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_4
1923 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
1924 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1925 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
1926 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1927 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1928 ; RV64ZVE32F-NEXT: add a2, a0, a2
1929 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1930 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
1931 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
1932 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
1933 ; RV64ZVE32F-NEXT: .LBB26_4: # %else2
1934 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
1935 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
1936 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1937 ; RV64ZVE32F-NEXT: andi a2, a1, 4
1938 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
1939 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_12
1940 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
1941 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1942 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_13
1943 ; RV64ZVE32F-NEXT: .LBB26_6: # %else8
1944 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1945 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_14
1946 ; RV64ZVE32F-NEXT: .LBB26_7: # %else11
1947 ; RV64ZVE32F-NEXT: andi a2, a1, 32
1948 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_9
1949 ; RV64ZVE32F-NEXT: .LBB26_8: # %cond.load13
1950 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1951 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
1952 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1953 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1954 ; RV64ZVE32F-NEXT: add a2, a0, a2
1955 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1956 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1957 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
1958 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
1959 ; RV64ZVE32F-NEXT: .LBB26_9: # %else14
1960 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
1961 ; RV64ZVE32F-NEXT: andi a2, a1, 64
1962 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
1963 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_15
1964 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
1965 ; RV64ZVE32F-NEXT: andi a1, a1, -128
1966 ; RV64ZVE32F-NEXT: bnez a1, .LBB26_16
1967 ; RV64ZVE32F-NEXT: .LBB26_11: # %else20
1968 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
1969 ; RV64ZVE32F-NEXT: ret
1970 ; RV64ZVE32F-NEXT: .LBB26_12: # %cond.load4
1971 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1972 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1973 ; RV64ZVE32F-NEXT: add a2, a0, a2
1974 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1975 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
1976 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
1977 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
1978 ; RV64ZVE32F-NEXT: andi a2, a1, 8
1979 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_6
1980 ; RV64ZVE32F-NEXT: .LBB26_13: # %cond.load7
1981 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
1982 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
1983 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
1984 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1985 ; RV64ZVE32F-NEXT: add a2, a0, a2
1986 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1987 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1988 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
1989 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
1990 ; RV64ZVE32F-NEXT: andi a2, a1, 16
1991 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_7
1992 ; RV64ZVE32F-NEXT: .LBB26_14: # %cond.load10
1993 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
1994 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
1995 ; RV64ZVE32F-NEXT: slli a2, a2, 1
1996 ; RV64ZVE32F-NEXT: add a2, a0, a2
1997 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
1998 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
1999 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
2000 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2001 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_8
2002 ; RV64ZVE32F-NEXT: j .LBB26_9
2003 ; RV64ZVE32F-NEXT: .LBB26_15: # %cond.load16
2004 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2005 ; RV64ZVE32F-NEXT: slli a2, a2, 1
2006 ; RV64ZVE32F-NEXT: add a2, a0, a2
2007 ; RV64ZVE32F-NEXT: lh a2, 0(a2)
2008 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2009 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
2010 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
2011 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2012 ; RV64ZVE32F-NEXT: beqz a1, .LBB26_11
2013 ; RV64ZVE32F-NEXT: .LBB26_16: # %cond.load19
2014 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
2015 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2016 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
2017 ; RV64ZVE32F-NEXT: slli a1, a1, 1
2018 ; RV64ZVE32F-NEXT: add a0, a0, a1
2019 ; RV64ZVE32F-NEXT: lh a0, 0(a0)
2020 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
2021 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2022 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
2023 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
2024 ; RV64ZVE32F-NEXT: ret
2025 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
2026 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
2030 declare <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i32>)
2032 define <1 x i32> @mgather_v1i32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i32> %passthru) {
2033 ; RV32V-LABEL: mgather_v1i32:
2035 ; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
2036 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
2037 ; RV32V-NEXT: vmv1r.v v8, v9
2040 ; RV64V-LABEL: mgather_v1i32:
2042 ; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
2043 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
2044 ; RV64V-NEXT: vmv1r.v v8, v9
2047 ; RV32ZVE32F-LABEL: mgather_v1i32:
2048 ; RV32ZVE32F: # %bb.0:
2049 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
2050 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
2051 ; RV32ZVE32F-NEXT: vmv.v.v v8, v9
2052 ; RV32ZVE32F-NEXT: ret
2054 ; RV64ZVE32F-LABEL: mgather_v1i32:
2055 ; RV64ZVE32F: # %bb.0:
2056 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
2057 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
2058 ; RV64ZVE32F-NEXT: bnez a1, .LBB27_2
2059 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
2060 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2061 ; RV64ZVE32F-NEXT: vle32.v v8, (a0)
2062 ; RV64ZVE32F-NEXT: .LBB27_2: # %else
2063 ; RV64ZVE32F-NEXT: ret
2064 %v = call <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x i32> %passthru)
2068 declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
2070 define <2 x i32> @mgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
2071 ; RV32V-LABEL: mgather_v2i32:
2073 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
2074 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
2075 ; RV32V-NEXT: vmv1r.v v8, v9
2078 ; RV64V-LABEL: mgather_v2i32:
2080 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
2081 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
2082 ; RV64V-NEXT: vmv1r.v v8, v9
2085 ; RV32ZVE32F-LABEL: mgather_v2i32:
2086 ; RV32ZVE32F: # %bb.0:
2087 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
2088 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
2089 ; RV32ZVE32F-NEXT: vmv.v.v v8, v9
2090 ; RV32ZVE32F-NEXT: ret
2092 ; RV64ZVE32F-LABEL: mgather_v2i32:
2093 ; RV64ZVE32F: # %bb.0:
2094 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2095 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
2096 ; RV64ZVE32F-NEXT: andi a3, a2, 1
2097 ; RV64ZVE32F-NEXT: bnez a3, .LBB28_3
2098 ; RV64ZVE32F-NEXT: # %bb.1: # %else
2099 ; RV64ZVE32F-NEXT: andi a2, a2, 2
2100 ; RV64ZVE32F-NEXT: bnez a2, .LBB28_4
2101 ; RV64ZVE32F-NEXT: .LBB28_2: # %else2
2102 ; RV64ZVE32F-NEXT: ret
2103 ; RV64ZVE32F-NEXT: .LBB28_3: # %cond.load
2104 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2105 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2106 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
2107 ; RV64ZVE32F-NEXT: andi a2, a2, 2
2108 ; RV64ZVE32F-NEXT: beqz a2, .LBB28_2
2109 ; RV64ZVE32F-NEXT: .LBB28_4: # %cond.load1
2110 ; RV64ZVE32F-NEXT: lw a0, 0(a1)
2111 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
2112 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
2113 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
2114 ; RV64ZVE32F-NEXT: ret
2115 %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
2119 define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
2120 ; RV32V-LABEL: mgather_v2i32_sextload_v2i64:
2122 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
2123 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
2124 ; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
2125 ; RV32V-NEXT: vsext.vf2 v8, v9
2128 ; RV64V-LABEL: mgather_v2i32_sextload_v2i64:
2130 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
2131 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
2132 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
2133 ; RV64V-NEXT: vsext.vf2 v8, v9
2136 ; RV32ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
2137 ; RV32ZVE32F: # %bb.0:
2138 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
2139 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
2140 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
2141 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
2142 ; RV32ZVE32F-NEXT: srai a1, a1, 31
2143 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9
2144 ; RV32ZVE32F-NEXT: srai a2, a2, 31
2145 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2146 ; RV32ZVE32F-NEXT: vse32.v v9, (a0)
2147 ; RV32ZVE32F-NEXT: addi a3, a0, 8
2148 ; RV32ZVE32F-NEXT: vse32.v v8, (a3)
2149 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
2150 ; RV32ZVE32F-NEXT: sw a1, 12(a0)
2151 ; RV32ZVE32F-NEXT: ret
2153 ; RV64ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
2154 ; RV64ZVE32F: # %bb.0:
2155 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2156 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
2157 ; RV64ZVE32F-NEXT: andi a3, a2, 1
2158 ; RV64ZVE32F-NEXT: beqz a3, .LBB29_2
2159 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
2160 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2161 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2162 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
2163 ; RV64ZVE32F-NEXT: .LBB29_2: # %else
2164 ; RV64ZVE32F-NEXT: andi a2, a2, 2
2165 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_4
2166 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
2167 ; RV64ZVE32F-NEXT: lw a0, 0(a1)
2168 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
2169 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
2170 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
2171 ; RV64ZVE32F-NEXT: .LBB29_4: # %else2
2172 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2173 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
2174 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2175 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
2176 ; RV64ZVE32F-NEXT: ret
2177 %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
2178 %ev = sext <2 x i32> %v to <2 x i64>
2182 define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
2183 ; RV32V-LABEL: mgather_v2i32_zextload_v2i64:
2185 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
2186 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
2187 ; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
2188 ; RV32V-NEXT: vzext.vf2 v8, v9
2191 ; RV64V-LABEL: mgather_v2i32_zextload_v2i64:
2193 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
2194 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
2195 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
2196 ; RV64V-NEXT: vzext.vf2 v8, v9
2199 ; RV32ZVE32F-LABEL: mgather_v2i32_zextload_v2i64:
2200 ; RV32ZVE32F: # %bb.0:
2201 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
2202 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
2203 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
2204 ; RV32ZVE32F-NEXT: sw zero, 12(a0)
2205 ; RV32ZVE32F-NEXT: sw zero, 4(a0)
2206 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2207 ; RV32ZVE32F-NEXT: vse32.v v9, (a0)
2208 ; RV32ZVE32F-NEXT: addi a0, a0, 8
2209 ; RV32ZVE32F-NEXT: vse32.v v8, (a0)
2210 ; RV32ZVE32F-NEXT: ret
2212 ; RV64ZVE32F-LABEL: mgather_v2i32_zextload_v2i64:
2213 ; RV64ZVE32F: # %bb.0:
2214 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2215 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
2216 ; RV64ZVE32F-NEXT: andi a3, a2, 1
2217 ; RV64ZVE32F-NEXT: beqz a3, .LBB30_2
2218 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
2219 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2220 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2221 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
2222 ; RV64ZVE32F-NEXT: .LBB30_2: # %else
2223 ; RV64ZVE32F-NEXT: andi a2, a2, 2
2224 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_4
2225 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
2226 ; RV64ZVE32F-NEXT: lw a0, 0(a1)
2227 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
2228 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
2229 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
2230 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2
2231 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
2232 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8
2233 ; RV64ZVE32F-NEXT: slli a0, a0, 32
2234 ; RV64ZVE32F-NEXT: srli a0, a0, 32
2235 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2236 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
2237 ; RV64ZVE32F-NEXT: slli a1, a1, 32
2238 ; RV64ZVE32F-NEXT: srli a1, a1, 32
2239 ; RV64ZVE32F-NEXT: ret
2240 %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
2241 %ev = zext <2 x i32> %v to <2 x i64>
2245 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
2247 define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthru) {
2248 ; RV32-LABEL: mgather_v4i32:
2250 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
2251 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
2252 ; RV32-NEXT: vmv.v.v v8, v9
2255 ; RV64V-LABEL: mgather_v4i32:
2257 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, mu
2258 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
2259 ; RV64V-NEXT: vmv.v.v v8, v10
2262 ; RV64ZVE32F-LABEL: mgather_v4i32:
2263 ; RV64ZVE32F: # %bb.0:
2264 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2265 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2266 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2267 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_5
2268 ; RV64ZVE32F-NEXT: # %bb.1: # %else
2269 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2270 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_6
2271 ; RV64ZVE32F-NEXT: .LBB31_2: # %else2
2272 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2273 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_7
2274 ; RV64ZVE32F-NEXT: .LBB31_3: # %else5
2275 ; RV64ZVE32F-NEXT: andi a1, a1, 8
2276 ; RV64ZVE32F-NEXT: bnez a1, .LBB31_8
2277 ; RV64ZVE32F-NEXT: .LBB31_4: # %else8
2278 ; RV64ZVE32F-NEXT: ret
2279 ; RV64ZVE32F-NEXT: .LBB31_5: # %cond.load
2280 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
2281 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2282 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
2283 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2284 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2285 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_2
2286 ; RV64ZVE32F-NEXT: .LBB31_6: # %cond.load1
2287 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
2288 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2289 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2290 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
2291 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
2292 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2293 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_3
2294 ; RV64ZVE32F-NEXT: .LBB31_7: # %cond.load4
2295 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
2296 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2297 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
2298 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
2299 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
2300 ; RV64ZVE32F-NEXT: andi a1, a1, 8
2301 ; RV64ZVE32F-NEXT: beqz a1, .LBB31_4
2302 ; RV64ZVE32F-NEXT: .LBB31_8: # %cond.load7
2303 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
2304 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2305 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2306 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
2307 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
2308 ; RV64ZVE32F-NEXT: ret
2309 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x i32> %passthru)
2313 define <4 x i32> @mgather_truemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) {
2314 ; RV32-LABEL: mgather_truemask_v4i32:
2316 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2317 ; RV32-NEXT: vluxei32.v v8, (zero), v8
2320 ; RV64V-LABEL: mgather_truemask_v4i32:
2322 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2323 ; RV64V-NEXT: vluxei64.v v10, (zero), v8
2324 ; RV64V-NEXT: vmv.v.v v8, v10
2327 ; RV64ZVE32F-LABEL: mgather_truemask_v4i32:
2328 ; RV64ZVE32F: # %bb.0:
2329 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
2330 ; RV64ZVE32F-NEXT: vmset.m v9
2331 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
2332 ; RV64ZVE32F-NEXT: beqz zero, .LBB32_5
2333 ; RV64ZVE32F-NEXT: # %bb.1: # %else
2334 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2335 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_6
2336 ; RV64ZVE32F-NEXT: .LBB32_2: # %else2
2337 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2338 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_7
2339 ; RV64ZVE32F-NEXT: .LBB32_3: # %else5
2340 ; RV64ZVE32F-NEXT: andi a1, a1, 8
2341 ; RV64ZVE32F-NEXT: bnez a1, .LBB32_8
2342 ; RV64ZVE32F-NEXT: .LBB32_4: # %else8
2343 ; RV64ZVE32F-NEXT: ret
2344 ; RV64ZVE32F-NEXT: .LBB32_5: # %cond.load
2345 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
2346 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2347 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma
2348 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2349 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2350 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_2
2351 ; RV64ZVE32F-NEXT: .LBB32_6: # %cond.load1
2352 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
2353 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2354 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2355 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
2356 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2357 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
2358 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2359 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_3
2360 ; RV64ZVE32F-NEXT: .LBB32_7: # %cond.load4
2361 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
2362 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2363 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
2364 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
2365 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
2366 ; RV64ZVE32F-NEXT: andi a1, a1, 8
2367 ; RV64ZVE32F-NEXT: beqz a1, .LBB32_4
2368 ; RV64ZVE32F-NEXT: .LBB32_8: # %cond.load7
2369 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
2370 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2371 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2372 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
2373 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
2374 ; RV64ZVE32F-NEXT: ret
2375 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
2376 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
2377 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mtrue, <4 x i32> %passthru)
2381 define <4 x i32> @mgather_falsemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) {
2382 ; RV32-LABEL: mgather_falsemask_v4i32:
2384 ; RV32-NEXT: vmv1r.v v8, v9
2387 ; RV64V-LABEL: mgather_falsemask_v4i32:
2389 ; RV64V-NEXT: vmv1r.v v8, v10
2392 ; RV64ZVE32F-LABEL: mgather_falsemask_v4i32:
2393 ; RV64ZVE32F: # %bb.0:
2394 ; RV64ZVE32F-NEXT: ret
2395 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer, <4 x i32> %passthru)
2399 declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>)
2401 define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthru) {
2402 ; RV32-LABEL: mgather_v8i32:
2404 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
2405 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
2406 ; RV32-NEXT: vmv.v.v v8, v10
2409 ; RV64V-LABEL: mgather_v8i32:
2411 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
2412 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
2413 ; RV64V-NEXT: vmv.v.v v8, v12
2416 ; RV64ZVE32F-LABEL: mgather_v8i32:
2417 ; RV64ZVE32F: # %bb.0:
2418 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2419 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2420 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2421 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_9
2422 ; RV64ZVE32F-NEXT: # %bb.1: # %else
2423 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2424 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_10
2425 ; RV64ZVE32F-NEXT: .LBB34_2: # %else2
2426 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2427 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_11
2428 ; RV64ZVE32F-NEXT: .LBB34_3: # %else5
2429 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2430 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_12
2431 ; RV64ZVE32F-NEXT: .LBB34_4: # %else8
2432 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2433 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_13
2434 ; RV64ZVE32F-NEXT: .LBB34_5: # %else11
2435 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2436 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_14
2437 ; RV64ZVE32F-NEXT: .LBB34_6: # %else14
2438 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2439 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_15
2440 ; RV64ZVE32F-NEXT: .LBB34_7: # %else17
2441 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2442 ; RV64ZVE32F-NEXT: bnez a1, .LBB34_16
2443 ; RV64ZVE32F-NEXT: .LBB34_8: # %else20
2444 ; RV64ZVE32F-NEXT: ret
2445 ; RV64ZVE32F-NEXT: .LBB34_9: # %cond.load
2446 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
2447 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2448 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
2449 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2450 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2451 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_2
2452 ; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1
2453 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
2454 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2455 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2456 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2457 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1
2458 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2459 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_3
2460 ; RV64ZVE32F-NEXT: .LBB34_11: # %cond.load4
2461 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
2462 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2463 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
2464 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2465 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2
2466 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2467 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_4
2468 ; RV64ZVE32F-NEXT: .LBB34_12: # %cond.load7
2469 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
2470 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2471 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
2472 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2473 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3
2474 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2475 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_5
2476 ; RV64ZVE32F-NEXT: .LBB34_13: # %cond.load10
2477 ; RV64ZVE32F-NEXT: ld a2, 32(a0)
2478 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2479 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
2480 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2481 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4
2482 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2483 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_6
2484 ; RV64ZVE32F-NEXT: .LBB34_14: # %cond.load13
2485 ; RV64ZVE32F-NEXT: ld a2, 40(a0)
2486 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2487 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
2488 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2489 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5
2490 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2491 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_7
2492 ; RV64ZVE32F-NEXT: .LBB34_15: # %cond.load16
2493 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
2494 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2495 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
2496 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2497 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6
2498 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2499 ; RV64ZVE32F-NEXT: beqz a1, .LBB34_8
2500 ; RV64ZVE32F-NEXT: .LBB34_16: # %cond.load19
2501 ; RV64ZVE32F-NEXT: ld a0, 56(a0)
2502 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2503 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2504 ; RV64ZVE32F-NEXT: vmv.s.x v10, a0
2505 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7
2506 ; RV64ZVE32F-NEXT: ret
2507 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
2511 define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
2512 ; RV32-LABEL: mgather_baseidx_v8i8_v8i32:
2514 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
2515 ; RV32-NEXT: vsext.vf4 v12, v8
2516 ; RV32-NEXT: vsll.vi v8, v12, 2
2517 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
2518 ; RV32-NEXT: vmv.v.v v8, v10
2521 ; RV64V-LABEL: mgather_baseidx_v8i8_v8i32:
2523 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2524 ; RV64V-NEXT: vsext.vf8 v12, v8
2525 ; RV64V-NEXT: vsll.vi v12, v12, 2
2526 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
2527 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
2528 ; RV64V-NEXT: vmv.v.v v8, v10
2531 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i32:
2532 ; RV64ZVE32F: # %bb.0:
2533 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2534 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2535 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2536 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_2
2537 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
2538 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2539 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2540 ; RV64ZVE32F-NEXT: add a2, a0, a2
2541 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2542 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
2543 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2544 ; RV64ZVE32F-NEXT: .LBB35_2: # %else
2545 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2546 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_4
2547 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
2548 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2549 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
2550 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
2551 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2552 ; RV64ZVE32F-NEXT: add a2, a0, a2
2553 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2554 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2555 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
2556 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2557 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
2558 ; RV64ZVE32F-NEXT: .LBB35_4: # %else2
2559 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
2560 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
2561 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2562 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2563 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
2564 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_12
2565 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
2566 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2567 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_13
2568 ; RV64ZVE32F-NEXT: .LBB35_6: # %else8
2569 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2570 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_14
2571 ; RV64ZVE32F-NEXT: .LBB35_7: # %else11
2572 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2573 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_9
2574 ; RV64ZVE32F-NEXT: .LBB35_8: # %cond.load13
2575 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2576 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
2577 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2578 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2579 ; RV64ZVE32F-NEXT: add a2, a0, a2
2580 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2581 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2582 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2583 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
2584 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
2585 ; RV64ZVE32F-NEXT: .LBB35_9: # %else14
2586 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2587 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2588 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
2589 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_15
2590 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
2591 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2592 ; RV64ZVE32F-NEXT: bnez a1, .LBB35_16
2593 ; RV64ZVE32F-NEXT: .LBB35_11: # %else20
2594 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
2595 ; RV64ZVE32F-NEXT: ret
2596 ; RV64ZVE32F-NEXT: .LBB35_12: # %cond.load4
2597 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2598 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2599 ; RV64ZVE32F-NEXT: add a2, a0, a2
2600 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2601 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2602 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2603 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
2604 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
2605 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2606 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
2607 ; RV64ZVE32F-NEXT: .LBB35_13: # %cond.load7
2608 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2609 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2610 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2611 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2612 ; RV64ZVE32F-NEXT: add a2, a0, a2
2613 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2614 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2615 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2616 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
2617 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
2618 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2619 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
2620 ; RV64ZVE32F-NEXT: .LBB35_14: # %cond.load10
2621 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2622 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
2623 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2624 ; RV64ZVE32F-NEXT: add a2, a0, a2
2625 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2626 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
2627 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2628 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
2629 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2630 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_8
2631 ; RV64ZVE32F-NEXT: j .LBB35_9
2632 ; RV64ZVE32F-NEXT: .LBB35_15: # %cond.load16
2633 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2634 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2635 ; RV64ZVE32F-NEXT: add a2, a0, a2
2636 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2637 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2638 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2639 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
2640 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
2641 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2642 ; RV64ZVE32F-NEXT: beqz a1, .LBB35_11
2643 ; RV64ZVE32F-NEXT: .LBB35_16: # %cond.load19
2644 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2645 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2646 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
2647 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2648 ; RV64ZVE32F-NEXT: add a0, a0, a1
2649 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2650 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2651 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
2652 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2653 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
2654 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
2655 ; RV64ZVE32F-NEXT: ret
2656 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
2657 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
2661 define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
2662 ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i32:
2664 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
2665 ; RV32-NEXT: vsext.vf4 v12, v8
2666 ; RV32-NEXT: vsll.vi v8, v12, 2
2667 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
2668 ; RV32-NEXT: vmv.v.v v8, v10
2671 ; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i32:
2673 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2674 ; RV64V-NEXT: vsext.vf8 v12, v8
2675 ; RV64V-NEXT: vsll.vi v12, v12, 2
2676 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
2677 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
2678 ; RV64V-NEXT: vmv.v.v v8, v10
2681 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i32:
2682 ; RV64ZVE32F: # %bb.0:
2683 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2684 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2685 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2686 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_2
2687 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
2688 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2689 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2690 ; RV64ZVE32F-NEXT: add a2, a0, a2
2691 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2692 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
2693 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2694 ; RV64ZVE32F-NEXT: .LBB36_2: # %else
2695 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2696 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_4
2697 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
2698 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2699 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
2700 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
2701 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2702 ; RV64ZVE32F-NEXT: add a2, a0, a2
2703 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2704 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2705 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
2706 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2707 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
2708 ; RV64ZVE32F-NEXT: .LBB36_4: # %else2
2709 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
2710 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
2711 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2712 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2713 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
2714 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_12
2715 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
2716 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2717 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_13
2718 ; RV64ZVE32F-NEXT: .LBB36_6: # %else8
2719 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2720 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_14
2721 ; RV64ZVE32F-NEXT: .LBB36_7: # %else11
2722 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2723 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_9
2724 ; RV64ZVE32F-NEXT: .LBB36_8: # %cond.load13
2725 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2726 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
2727 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2728 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2729 ; RV64ZVE32F-NEXT: add a2, a0, a2
2730 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2731 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2732 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2733 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
2734 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
2735 ; RV64ZVE32F-NEXT: .LBB36_9: # %else14
2736 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2737 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2738 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
2739 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_15
2740 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
2741 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2742 ; RV64ZVE32F-NEXT: bnez a1, .LBB36_16
2743 ; RV64ZVE32F-NEXT: .LBB36_11: # %else20
2744 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
2745 ; RV64ZVE32F-NEXT: ret
2746 ; RV64ZVE32F-NEXT: .LBB36_12: # %cond.load4
2747 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2748 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2749 ; RV64ZVE32F-NEXT: add a2, a0, a2
2750 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2751 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2752 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2753 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
2754 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
2755 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2756 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_6
2757 ; RV64ZVE32F-NEXT: .LBB36_13: # %cond.load7
2758 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2759 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2760 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2761 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2762 ; RV64ZVE32F-NEXT: add a2, a0, a2
2763 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2764 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2765 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2766 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
2767 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
2768 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2769 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_7
2770 ; RV64ZVE32F-NEXT: .LBB36_14: # %cond.load10
2771 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2772 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
2773 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2774 ; RV64ZVE32F-NEXT: add a2, a0, a2
2775 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2776 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
2777 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2778 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
2779 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2780 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_8
2781 ; RV64ZVE32F-NEXT: j .LBB36_9
2782 ; RV64ZVE32F-NEXT: .LBB36_15: # %cond.load16
2783 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2784 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2785 ; RV64ZVE32F-NEXT: add a2, a0, a2
2786 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2787 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2788 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2789 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
2790 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
2791 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2792 ; RV64ZVE32F-NEXT: beqz a1, .LBB36_11
2793 ; RV64ZVE32F-NEXT: .LBB36_16: # %cond.load19
2794 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2795 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2796 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
2797 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2798 ; RV64ZVE32F-NEXT: add a0, a0, a1
2799 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2800 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2801 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
2802 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2803 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
2804 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
2805 ; RV64ZVE32F-NEXT: ret
2806 %eidxs = sext <8 x i8> %idxs to <8 x i32>
2807 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2808 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
2812 define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
2813 ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i32:
2815 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2816 ; RV32-NEXT: vzext.vf2 v9, v8
2817 ; RV32-NEXT: vsll.vi v8, v9, 2
2818 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
2819 ; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t
2820 ; RV32-NEXT: vmv.v.v v8, v10
2823 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i32:
2825 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2826 ; RV64V-NEXT: vzext.vf2 v9, v8
2827 ; RV64V-NEXT: vsll.vi v8, v9, 2
2828 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
2829 ; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t
2830 ; RV64V-NEXT: vmv.v.v v8, v10
2833 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i32:
2834 ; RV64ZVE32F: # %bb.0:
2835 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2836 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2837 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2838 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_2
2839 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
2840 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2841 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2842 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2843 ; RV64ZVE32F-NEXT: add a2, a0, a2
2844 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2845 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
2846 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
2847 ; RV64ZVE32F-NEXT: .LBB37_2: # %else
2848 ; RV64ZVE32F-NEXT: andi a2, a1, 2
2849 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_4
2850 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
2851 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2852 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
2853 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
2854 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2855 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2856 ; RV64ZVE32F-NEXT: add a2, a0, a2
2857 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2858 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2859 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
2860 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
2861 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
2862 ; RV64ZVE32F-NEXT: .LBB37_4: # %else2
2863 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
2864 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
2865 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2866 ; RV64ZVE32F-NEXT: andi a2, a1, 4
2867 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
2868 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_12
2869 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
2870 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2871 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_13
2872 ; RV64ZVE32F-NEXT: .LBB37_6: # %else8
2873 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2874 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_14
2875 ; RV64ZVE32F-NEXT: .LBB37_7: # %else11
2876 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2877 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_9
2878 ; RV64ZVE32F-NEXT: .LBB37_8: # %cond.load13
2879 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2880 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
2881 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2882 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2883 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2884 ; RV64ZVE32F-NEXT: add a2, a0, a2
2885 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2886 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2887 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2888 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
2889 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
2890 ; RV64ZVE32F-NEXT: .LBB37_9: # %else14
2891 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
2892 ; RV64ZVE32F-NEXT: andi a2, a1, 64
2893 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
2894 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_15
2895 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
2896 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2897 ; RV64ZVE32F-NEXT: bnez a1, .LBB37_16
2898 ; RV64ZVE32F-NEXT: .LBB37_11: # %else20
2899 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
2900 ; RV64ZVE32F-NEXT: ret
2901 ; RV64ZVE32F-NEXT: .LBB37_12: # %cond.load4
2902 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2903 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2904 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2905 ; RV64ZVE32F-NEXT: add a2, a0, a2
2906 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2907 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2908 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2909 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
2910 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
2911 ; RV64ZVE32F-NEXT: andi a2, a1, 8
2912 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_6
2913 ; RV64ZVE32F-NEXT: .LBB37_13: # %cond.load7
2914 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2915 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2916 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2917 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2918 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2919 ; RV64ZVE32F-NEXT: add a2, a0, a2
2920 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2921 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2922 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2923 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
2924 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
2925 ; RV64ZVE32F-NEXT: andi a2, a1, 16
2926 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_7
2927 ; RV64ZVE32F-NEXT: .LBB37_14: # %cond.load10
2928 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2929 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
2930 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2931 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2932 ; RV64ZVE32F-NEXT: add a2, a0, a2
2933 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2934 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
2935 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
2936 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
2937 ; RV64ZVE32F-NEXT: andi a2, a1, 32
2938 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_8
2939 ; RV64ZVE32F-NEXT: j .LBB37_9
2940 ; RV64ZVE32F-NEXT: .LBB37_15: # %cond.load16
2941 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
2942 ; RV64ZVE32F-NEXT: andi a2, a2, 255
2943 ; RV64ZVE32F-NEXT: slli a2, a2, 2
2944 ; RV64ZVE32F-NEXT: add a2, a0, a2
2945 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
2946 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2947 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
2948 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
2949 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
2950 ; RV64ZVE32F-NEXT: andi a1, a1, -128
2951 ; RV64ZVE32F-NEXT: beqz a1, .LBB37_11
2952 ; RV64ZVE32F-NEXT: .LBB37_16: # %cond.load19
2953 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2954 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
2955 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
2956 ; RV64ZVE32F-NEXT: andi a1, a1, 255
2957 ; RV64ZVE32F-NEXT: slli a1, a1, 2
2958 ; RV64ZVE32F-NEXT: add a0, a0, a1
2959 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
2960 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2961 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
2962 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2963 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
2964 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
2965 ; RV64ZVE32F-NEXT: ret
2966 %eidxs = zext <8 x i8> %idxs to <8 x i32>
2967 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
2968 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
2972 define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
2973 ; RV32-LABEL: mgather_baseidx_v8i16_v8i32:
2975 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
2976 ; RV32-NEXT: vsext.vf2 v12, v8
2977 ; RV32-NEXT: vsll.vi v8, v12, 2
2978 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
2979 ; RV32-NEXT: vmv.v.v v8, v10
2982 ; RV64V-LABEL: mgather_baseidx_v8i16_v8i32:
2984 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
2985 ; RV64V-NEXT: vsext.vf4 v12, v8
2986 ; RV64V-NEXT: vsll.vi v12, v12, 2
2987 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
2988 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
2989 ; RV64V-NEXT: vmv.v.v v8, v10
2992 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i32:
2993 ; RV64ZVE32F: # %bb.0:
2994 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
2995 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
2996 ; RV64ZVE32F-NEXT: andi a2, a1, 1
2997 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_2
2998 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
2999 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3000 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3001 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3002 ; RV64ZVE32F-NEXT: add a2, a0, a2
3003 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3004 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
3005 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
3006 ; RV64ZVE32F-NEXT: .LBB38_2: # %else
3007 ; RV64ZVE32F-NEXT: andi a2, a1, 2
3008 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_4
3009 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
3010 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3011 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3012 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
3013 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3014 ; RV64ZVE32F-NEXT: add a2, a0, a2
3015 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3016 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3017 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
3018 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
3019 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
3020 ; RV64ZVE32F-NEXT: .LBB38_4: # %else2
3021 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
3022 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
3023 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
3024 ; RV64ZVE32F-NEXT: andi a2, a1, 4
3025 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3026 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_12
3027 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
3028 ; RV64ZVE32F-NEXT: andi a2, a1, 8
3029 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_13
3030 ; RV64ZVE32F-NEXT: .LBB38_6: # %else8
3031 ; RV64ZVE32F-NEXT: andi a2, a1, 16
3032 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_14
3033 ; RV64ZVE32F-NEXT: .LBB38_7: # %else11
3034 ; RV64ZVE32F-NEXT: andi a2, a1, 32
3035 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_9
3036 ; RV64ZVE32F-NEXT: .LBB38_8: # %cond.load13
3037 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3038 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
3039 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3040 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3041 ; RV64ZVE32F-NEXT: add a2, a0, a2
3042 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3043 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3044 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3045 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
3046 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
3047 ; RV64ZVE32F-NEXT: .LBB38_9: # %else14
3048 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
3049 ; RV64ZVE32F-NEXT: andi a2, a1, 64
3050 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
3051 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_15
3052 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
3053 ; RV64ZVE32F-NEXT: andi a1, a1, -128
3054 ; RV64ZVE32F-NEXT: bnez a1, .LBB38_16
3055 ; RV64ZVE32F-NEXT: .LBB38_11: # %else20
3056 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
3057 ; RV64ZVE32F-NEXT: ret
3058 ; RV64ZVE32F-NEXT: .LBB38_12: # %cond.load4
3059 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3060 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3061 ; RV64ZVE32F-NEXT: add a2, a0, a2
3062 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3063 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3064 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3065 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
3066 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
3067 ; RV64ZVE32F-NEXT: andi a2, a1, 8
3068 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_6
3069 ; RV64ZVE32F-NEXT: .LBB38_13: # %cond.load7
3070 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3071 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3072 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3073 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3074 ; RV64ZVE32F-NEXT: add a2, a0, a2
3075 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3076 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3077 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3078 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3079 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
3080 ; RV64ZVE32F-NEXT: andi a2, a1, 16
3081 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_7
3082 ; RV64ZVE32F-NEXT: .LBB38_14: # %cond.load10
3083 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3084 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
3085 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3086 ; RV64ZVE32F-NEXT: add a2, a0, a2
3087 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3088 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
3089 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3090 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
3091 ; RV64ZVE32F-NEXT: andi a2, a1, 32
3092 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_8
3093 ; RV64ZVE32F-NEXT: j .LBB38_9
3094 ; RV64ZVE32F-NEXT: .LBB38_15: # %cond.load16
3095 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3096 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3097 ; RV64ZVE32F-NEXT: add a2, a0, a2
3098 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3099 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3100 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3101 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
3102 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
3103 ; RV64ZVE32F-NEXT: andi a1, a1, -128
3104 ; RV64ZVE32F-NEXT: beqz a1, .LBB38_11
3105 ; RV64ZVE32F-NEXT: .LBB38_16: # %cond.load19
3106 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3107 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3108 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
3109 ; RV64ZVE32F-NEXT: slli a1, a1, 2
3110 ; RV64ZVE32F-NEXT: add a0, a0, a1
3111 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
3112 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3113 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
3114 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3115 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
3116 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
3117 ; RV64ZVE32F-NEXT: ret
3118 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
3119 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
3123 define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
3124 ; RV32-LABEL: mgather_baseidx_sext_v8i16_v8i32:
3126 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
3127 ; RV32-NEXT: vsext.vf2 v12, v8
3128 ; RV32-NEXT: vsll.vi v8, v12, 2
3129 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
3130 ; RV32-NEXT: vmv.v.v v8, v10
3133 ; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8i32:
3135 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3136 ; RV64V-NEXT: vsext.vf4 v12, v8
3137 ; RV64V-NEXT: vsll.vi v12, v12, 2
3138 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
3139 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
3140 ; RV64V-NEXT: vmv.v.v v8, v10
3143 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i32:
3144 ; RV64ZVE32F: # %bb.0:
3145 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3146 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
3147 ; RV64ZVE32F-NEXT: andi a2, a1, 1
3148 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_2
3149 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
3150 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3151 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3152 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3153 ; RV64ZVE32F-NEXT: add a2, a0, a2
3154 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3155 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
3156 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
3157 ; RV64ZVE32F-NEXT: .LBB39_2: # %else
3158 ; RV64ZVE32F-NEXT: andi a2, a1, 2
3159 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_4
3160 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
3161 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3162 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3163 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
3164 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3165 ; RV64ZVE32F-NEXT: add a2, a0, a2
3166 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3167 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3168 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
3169 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
3170 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
3171 ; RV64ZVE32F-NEXT: .LBB39_4: # %else2
3172 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
3173 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
3174 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
3175 ; RV64ZVE32F-NEXT: andi a2, a1, 4
3176 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3177 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_12
3178 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
3179 ; RV64ZVE32F-NEXT: andi a2, a1, 8
3180 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_13
3181 ; RV64ZVE32F-NEXT: .LBB39_6: # %else8
3182 ; RV64ZVE32F-NEXT: andi a2, a1, 16
3183 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_14
3184 ; RV64ZVE32F-NEXT: .LBB39_7: # %else11
3185 ; RV64ZVE32F-NEXT: andi a2, a1, 32
3186 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_9
3187 ; RV64ZVE32F-NEXT: .LBB39_8: # %cond.load13
3188 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3189 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
3190 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3191 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3192 ; RV64ZVE32F-NEXT: add a2, a0, a2
3193 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3194 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3195 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3196 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
3197 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
3198 ; RV64ZVE32F-NEXT: .LBB39_9: # %else14
3199 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
3200 ; RV64ZVE32F-NEXT: andi a2, a1, 64
3201 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
3202 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_15
3203 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
3204 ; RV64ZVE32F-NEXT: andi a1, a1, -128
3205 ; RV64ZVE32F-NEXT: bnez a1, .LBB39_16
3206 ; RV64ZVE32F-NEXT: .LBB39_11: # %else20
3207 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
3208 ; RV64ZVE32F-NEXT: ret
3209 ; RV64ZVE32F-NEXT: .LBB39_12: # %cond.load4
3210 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3211 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3212 ; RV64ZVE32F-NEXT: add a2, a0, a2
3213 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3214 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3215 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3216 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
3217 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
3218 ; RV64ZVE32F-NEXT: andi a2, a1, 8
3219 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_6
3220 ; RV64ZVE32F-NEXT: .LBB39_13: # %cond.load7
3221 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3222 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3223 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3224 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3225 ; RV64ZVE32F-NEXT: add a2, a0, a2
3226 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3227 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3228 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3229 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3230 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
3231 ; RV64ZVE32F-NEXT: andi a2, a1, 16
3232 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_7
3233 ; RV64ZVE32F-NEXT: .LBB39_14: # %cond.load10
3234 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3235 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
3236 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3237 ; RV64ZVE32F-NEXT: add a2, a0, a2
3238 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3239 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
3240 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3241 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
3242 ; RV64ZVE32F-NEXT: andi a2, a1, 32
3243 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_8
3244 ; RV64ZVE32F-NEXT: j .LBB39_9
3245 ; RV64ZVE32F-NEXT: .LBB39_15: # %cond.load16
3246 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3247 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3248 ; RV64ZVE32F-NEXT: add a2, a0, a2
3249 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3250 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3251 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3252 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
3253 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
3254 ; RV64ZVE32F-NEXT: andi a1, a1, -128
3255 ; RV64ZVE32F-NEXT: beqz a1, .LBB39_11
3256 ; RV64ZVE32F-NEXT: .LBB39_16: # %cond.load19
3257 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3258 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3259 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
3260 ; RV64ZVE32F-NEXT: slli a1, a1, 2
3261 ; RV64ZVE32F-NEXT: add a0, a0, a1
3262 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
3263 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3264 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
3265 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3266 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
3267 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
3268 ; RV64ZVE32F-NEXT: ret
3269 %eidxs = sext <8 x i16> %idxs to <8 x i32>
3270 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
3271 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
3275 define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
3276 ; RV32-LABEL: mgather_baseidx_zext_v8i16_v8i32:
3278 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
3279 ; RV32-NEXT: vzext.vf2 v12, v8
3280 ; RV32-NEXT: vsll.vi v8, v12, 2
3281 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
3282 ; RV32-NEXT: vmv.v.v v8, v10
3285 ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i32:
3287 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
3288 ; RV64V-NEXT: vzext.vf2 v12, v8
3289 ; RV64V-NEXT: vsll.vi v8, v12, 2
3290 ; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t
3291 ; RV64V-NEXT: vmv.v.v v8, v10
3294 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i32:
3295 ; RV64ZVE32F: # %bb.0:
3296 ; RV64ZVE32F-NEXT: lui a1, 16
3297 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3298 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
3299 ; RV64ZVE32F-NEXT: andi a3, a2, 1
3300 ; RV64ZVE32F-NEXT: addiw a1, a1, -1
3301 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_2
3302 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
3303 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3304 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
3305 ; RV64ZVE32F-NEXT: and a3, a3, a1
3306 ; RV64ZVE32F-NEXT: slli a3, a3, 2
3307 ; RV64ZVE32F-NEXT: add a3, a0, a3
3308 ; RV64ZVE32F-NEXT: lw a3, 0(a3)
3309 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
3310 ; RV64ZVE32F-NEXT: vmv.s.x v10, a3
3311 ; RV64ZVE32F-NEXT: .LBB40_2: # %else
3312 ; RV64ZVE32F-NEXT: andi a3, a2, 2
3313 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_4
3314 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
3315 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3316 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3317 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
3318 ; RV64ZVE32F-NEXT: and a3, a3, a1
3319 ; RV64ZVE32F-NEXT: slli a3, a3, 2
3320 ; RV64ZVE32F-NEXT: add a3, a0, a3
3321 ; RV64ZVE32F-NEXT: lw a3, 0(a3)
3322 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3323 ; RV64ZVE32F-NEXT: vmv.s.x v9, a3
3324 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
3325 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
3326 ; RV64ZVE32F-NEXT: .LBB40_4: # %else2
3327 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
3328 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
3329 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
3330 ; RV64ZVE32F-NEXT: andi a3, a2, 4
3331 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3332 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_12
3333 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
3334 ; RV64ZVE32F-NEXT: andi a3, a2, 8
3335 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_13
3336 ; RV64ZVE32F-NEXT: .LBB40_6: # %else8
3337 ; RV64ZVE32F-NEXT: andi a3, a2, 16
3338 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_14
3339 ; RV64ZVE32F-NEXT: .LBB40_7: # %else11
3340 ; RV64ZVE32F-NEXT: andi a3, a2, 32
3341 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_9
3342 ; RV64ZVE32F-NEXT: .LBB40_8: # %cond.load13
3343 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3344 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
3345 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
3346 ; RV64ZVE32F-NEXT: and a3, a3, a1
3347 ; RV64ZVE32F-NEXT: slli a3, a3, 2
3348 ; RV64ZVE32F-NEXT: add a3, a0, a3
3349 ; RV64ZVE32F-NEXT: lw a3, 0(a3)
3350 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3351 ; RV64ZVE32F-NEXT: vmv.s.x v8, a3
3352 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
3353 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
3354 ; RV64ZVE32F-NEXT: .LBB40_9: # %else14
3355 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
3356 ; RV64ZVE32F-NEXT: andi a3, a2, 64
3357 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
3358 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_15
3359 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
3360 ; RV64ZVE32F-NEXT: andi a2, a2, -128
3361 ; RV64ZVE32F-NEXT: bnez a2, .LBB40_16
3362 ; RV64ZVE32F-NEXT: .LBB40_11: # %else20
3363 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
3364 ; RV64ZVE32F-NEXT: ret
3365 ; RV64ZVE32F-NEXT: .LBB40_12: # %cond.load4
3366 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
3367 ; RV64ZVE32F-NEXT: and a3, a3, a1
3368 ; RV64ZVE32F-NEXT: slli a3, a3, 2
3369 ; RV64ZVE32F-NEXT: add a3, a0, a3
3370 ; RV64ZVE32F-NEXT: lw a3, 0(a3)
3371 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3372 ; RV64ZVE32F-NEXT: vmv.s.x v12, a3
3373 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
3374 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
3375 ; RV64ZVE32F-NEXT: andi a3, a2, 8
3376 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_6
3377 ; RV64ZVE32F-NEXT: .LBB40_13: # %cond.load7
3378 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3379 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3380 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
3381 ; RV64ZVE32F-NEXT: and a3, a3, a1
3382 ; RV64ZVE32F-NEXT: slli a3, a3, 2
3383 ; RV64ZVE32F-NEXT: add a3, a0, a3
3384 ; RV64ZVE32F-NEXT: lw a3, 0(a3)
3385 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3386 ; RV64ZVE32F-NEXT: vmv.s.x v8, a3
3387 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3388 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
3389 ; RV64ZVE32F-NEXT: andi a3, a2, 16
3390 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_7
3391 ; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load10
3392 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3393 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
3394 ; RV64ZVE32F-NEXT: and a3, a3, a1
3395 ; RV64ZVE32F-NEXT: slli a3, a3, 2
3396 ; RV64ZVE32F-NEXT: add a3, a0, a3
3397 ; RV64ZVE32F-NEXT: lw a3, 0(a3)
3398 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
3399 ; RV64ZVE32F-NEXT: vmv.s.x v8, a3
3400 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
3401 ; RV64ZVE32F-NEXT: andi a3, a2, 32
3402 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_8
3403 ; RV64ZVE32F-NEXT: j .LBB40_9
3404 ; RV64ZVE32F-NEXT: .LBB40_15: # %cond.load16
3405 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
3406 ; RV64ZVE32F-NEXT: and a3, a3, a1
3407 ; RV64ZVE32F-NEXT: slli a3, a3, 2
3408 ; RV64ZVE32F-NEXT: add a3, a0, a3
3409 ; RV64ZVE32F-NEXT: lw a3, 0(a3)
3410 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3411 ; RV64ZVE32F-NEXT: vmv.s.x v12, a3
3412 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
3413 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
3414 ; RV64ZVE32F-NEXT: andi a2, a2, -128
3415 ; RV64ZVE32F-NEXT: beqz a2, .LBB40_11
3416 ; RV64ZVE32F-NEXT: .LBB40_16: # %cond.load19
3417 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
3418 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3419 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3420 ; RV64ZVE32F-NEXT: and a1, a2, a1
3421 ; RV64ZVE32F-NEXT: slli a1, a1, 2
3422 ; RV64ZVE32F-NEXT: add a0, a0, a1
3423 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
3424 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3425 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
3426 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3427 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
3428 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
3429 ; RV64ZVE32F-NEXT: ret
3430 %eidxs = zext <8 x i16> %idxs to <8 x i32>
3431 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
3432 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
3436 define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
3437 ; RV32-LABEL: mgather_baseidx_v8i32:
3439 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
3440 ; RV32-NEXT: vsll.vi v8, v8, 2
3441 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
3442 ; RV32-NEXT: vmv.v.v v8, v10
3445 ; RV64V-LABEL: mgather_baseidx_v8i32:
3447 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
3448 ; RV64V-NEXT: vsext.vf2 v12, v8
3449 ; RV64V-NEXT: vsll.vi v12, v12, 2
3450 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
3451 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
3452 ; RV64V-NEXT: vmv.v.v v8, v10
3455 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i32:
3456 ; RV64ZVE32F: # %bb.0:
3457 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3458 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
3459 ; RV64ZVE32F-NEXT: andi a2, a1, 1
3460 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_2
3461 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
3462 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
3463 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3464 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3465 ; RV64ZVE32F-NEXT: add a2, a0, a2
3466 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3467 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
3468 ; RV64ZVE32F-NEXT: .LBB41_2: # %else
3469 ; RV64ZVE32F-NEXT: andi a2, a1, 2
3470 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_4
3471 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
3472 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3473 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
3474 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
3475 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3476 ; RV64ZVE32F-NEXT: add a2, a0, a2
3477 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3478 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3479 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
3480 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
3481 ; RV64ZVE32F-NEXT: .LBB41_4: # %else2
3482 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
3483 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
3484 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
3485 ; RV64ZVE32F-NEXT: andi a2, a1, 4
3486 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
3487 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_12
3488 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
3489 ; RV64ZVE32F-NEXT: andi a2, a1, 8
3490 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_13
3491 ; RV64ZVE32F-NEXT: .LBB41_6: # %else8
3492 ; RV64ZVE32F-NEXT: andi a2, a1, 16
3493 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_14
3494 ; RV64ZVE32F-NEXT: .LBB41_7: # %else11
3495 ; RV64ZVE32F-NEXT: andi a2, a1, 32
3496 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_9
3497 ; RV64ZVE32F-NEXT: .LBB41_8: # %cond.load13
3498 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3499 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1
3500 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3501 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3502 ; RV64ZVE32F-NEXT: add a2, a0, a2
3503 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3504 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3505 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
3506 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
3507 ; RV64ZVE32F-NEXT: .LBB41_9: # %else14
3508 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
3509 ; RV64ZVE32F-NEXT: andi a2, a1, 64
3510 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2
3511 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_15
3512 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
3513 ; RV64ZVE32F-NEXT: andi a1, a1, -128
3514 ; RV64ZVE32F-NEXT: bnez a1, .LBB41_16
3515 ; RV64ZVE32F-NEXT: .LBB41_11: # %else20
3516 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
3517 ; RV64ZVE32F-NEXT: ret
3518 ; RV64ZVE32F-NEXT: .LBB41_12: # %cond.load4
3519 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3520 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3521 ; RV64ZVE32F-NEXT: add a2, a0, a2
3522 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3523 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
3524 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
3525 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2
3526 ; RV64ZVE32F-NEXT: andi a2, a1, 8
3527 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_6
3528 ; RV64ZVE32F-NEXT: .LBB41_13: # %cond.load7
3529 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3530 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3531 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3532 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3533 ; RV64ZVE32F-NEXT: add a2, a0, a2
3534 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3535 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3536 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
3537 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
3538 ; RV64ZVE32F-NEXT: andi a2, a1, 16
3539 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_7
3540 ; RV64ZVE32F-NEXT: .LBB41_14: # %cond.load10
3541 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
3542 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
3543 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3544 ; RV64ZVE32F-NEXT: add a2, a0, a2
3545 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3546 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
3547 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
3548 ; RV64ZVE32F-NEXT: andi a2, a1, 32
3549 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_8
3550 ; RV64ZVE32F-NEXT: j .LBB41_9
3551 ; RV64ZVE32F-NEXT: .LBB41_15: # %cond.load16
3552 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
3553 ; RV64ZVE32F-NEXT: slli a2, a2, 2
3554 ; RV64ZVE32F-NEXT: add a2, a0, a2
3555 ; RV64ZVE32F-NEXT: lw a2, 0(a2)
3556 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
3557 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
3558 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
3559 ; RV64ZVE32F-NEXT: andi a1, a1, -128
3560 ; RV64ZVE32F-NEXT: beqz a1, .LBB41_11
3561 ; RV64ZVE32F-NEXT: .LBB41_16: # %cond.load19
3562 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3563 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3564 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
3565 ; RV64ZVE32F-NEXT: slli a1, a1, 2
3566 ; RV64ZVE32F-NEXT: add a0, a0, a1
3567 ; RV64ZVE32F-NEXT: lw a0, 0(a0)
3568 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
3569 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
3570 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
3571 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
3572 ; RV64ZVE32F-NEXT: ret
3573 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
3574 %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
3578 declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>)
3580 define <1 x i64> @mgather_v1i64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i64> %passthru) {
3581 ; RV32V-LABEL: mgather_v1i64:
3583 ; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
3584 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
3585 ; RV32V-NEXT: vmv.v.v v8, v9
3588 ; RV64V-LABEL: mgather_v1i64:
3590 ; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
3591 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
3592 ; RV64V-NEXT: vmv.v.v v8, v9
3595 ; RV32ZVE32F-LABEL: mgather_v1i64:
3596 ; RV32ZVE32F: # %bb.0:
3597 ; RV32ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
3598 ; RV32ZVE32F-NEXT: vfirst.m a2, v0
3599 ; RV32ZVE32F-NEXT: bnez a2, .LBB42_2
3600 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
3601 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3602 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
3603 ; RV32ZVE32F-NEXT: lw a1, 4(a0)
3604 ; RV32ZVE32F-NEXT: lw a0, 0(a0)
3605 ; RV32ZVE32F-NEXT: .LBB42_2: # %else
3606 ; RV32ZVE32F-NEXT: ret
3608 ; RV64ZVE32F-LABEL: mgather_v1i64:
3609 ; RV64ZVE32F: # %bb.0:
3610 ; RV64ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
3611 ; RV64ZVE32F-NEXT: vfirst.m a2, v0
3612 ; RV64ZVE32F-NEXT: bnez a2, .LBB42_2
3613 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
3614 ; RV64ZVE32F-NEXT: ld a1, 0(a0)
3615 ; RV64ZVE32F-NEXT: .LBB42_2: # %else
3616 ; RV64ZVE32F-NEXT: mv a0, a1
3617 ; RV64ZVE32F-NEXT: ret
3618 %v = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> %ptrs, i32 8, <1 x i1> %m, <1 x i64> %passthru)
3622 declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
3624 define <2 x i64> @mgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> %passthru) {
3625 ; RV32V-LABEL: mgather_v2i64:
3627 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
3628 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
3629 ; RV32V-NEXT: vmv.v.v v8, v9
3632 ; RV64V-LABEL: mgather_v2i64:
3634 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
3635 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
3636 ; RV64V-NEXT: vmv.v.v v8, v9
3639 ; RV32ZVE32F-LABEL: mgather_v2i64:
3640 ; RV32ZVE32F: # %bb.0:
3641 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3642 ; RV32ZVE32F-NEXT: vmv.x.s a4, v0
3643 ; RV32ZVE32F-NEXT: andi a2, a4, 1
3644 ; RV32ZVE32F-NEXT: beqz a2, .LBB43_3
3645 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
3646 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3647 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
3648 ; RV32ZVE32F-NEXT: lw a2, 4(a3)
3649 ; RV32ZVE32F-NEXT: lw a3, 0(a3)
3650 ; RV32ZVE32F-NEXT: andi a4, a4, 2
3651 ; RV32ZVE32F-NEXT: bnez a4, .LBB43_4
3652 ; RV32ZVE32F-NEXT: .LBB43_2:
3653 ; RV32ZVE32F-NEXT: lw a4, 12(a1)
3654 ; RV32ZVE32F-NEXT: lw a1, 8(a1)
3655 ; RV32ZVE32F-NEXT: j .LBB43_5
3656 ; RV32ZVE32F-NEXT: .LBB43_3:
3657 ; RV32ZVE32F-NEXT: lw a2, 4(a1)
3658 ; RV32ZVE32F-NEXT: lw a3, 0(a1)
3659 ; RV32ZVE32F-NEXT: andi a4, a4, 2
3660 ; RV32ZVE32F-NEXT: beqz a4, .LBB43_2
3661 ; RV32ZVE32F-NEXT: .LBB43_4: # %cond.load1
3662 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3663 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
3664 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
3665 ; RV32ZVE32F-NEXT: lw a4, 4(a1)
3666 ; RV32ZVE32F-NEXT: lw a1, 0(a1)
3667 ; RV32ZVE32F-NEXT: .LBB43_5: # %else2
3668 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
3669 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
3670 ; RV32ZVE32F-NEXT: sw a1, 8(a0)
3671 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
3672 ; RV32ZVE32F-NEXT: ret
3674 ; RV64ZVE32F-LABEL: mgather_v2i64:
3675 ; RV64ZVE32F: # %bb.0:
3676 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3677 ; RV64ZVE32F-NEXT: vmv.x.s a4, v0
3678 ; RV64ZVE32F-NEXT: andi a5, a4, 1
3679 ; RV64ZVE32F-NEXT: beqz a5, .LBB43_2
3680 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
3681 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
3682 ; RV64ZVE32F-NEXT: .LBB43_2: # %else
3683 ; RV64ZVE32F-NEXT: andi a4, a4, 2
3684 ; RV64ZVE32F-NEXT: beqz a4, .LBB43_4
3685 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
3686 ; RV64ZVE32F-NEXT: ld a3, 0(a1)
3687 ; RV64ZVE32F-NEXT: .LBB43_4: # %else2
3688 ; RV64ZVE32F-NEXT: mv a0, a2
3689 ; RV64ZVE32F-NEXT: mv a1, a3
3690 ; RV64ZVE32F-NEXT: ret
3691 %v = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> %ptrs, i32 8, <2 x i1> %m, <2 x i64> %passthru)
3695 declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>)
3697 define <4 x i64> @mgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i64> %passthru) {
3698 ; RV32V-LABEL: mgather_v4i64:
3700 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
3701 ; RV32V-NEXT: vluxei32.v v10, (zero), v8, v0.t
3702 ; RV32V-NEXT: vmv.v.v v8, v10
3705 ; RV64V-LABEL: mgather_v4i64:
3707 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
3708 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
3709 ; RV64V-NEXT: vmv.v.v v8, v10
3712 ; RV32ZVE32F-LABEL: mgather_v4i64:
3713 ; RV32ZVE32F: # %bb.0:
3714 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3715 ; RV32ZVE32F-NEXT: vmv.x.s a6, v0
3716 ; RV32ZVE32F-NEXT: andi a2, a6, 1
3717 ; RV32ZVE32F-NEXT: beqz a2, .LBB44_5
3718 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
3719 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3720 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
3721 ; RV32ZVE32F-NEXT: lw a2, 4(a3)
3722 ; RV32ZVE32F-NEXT: lw a3, 0(a3)
3723 ; RV32ZVE32F-NEXT: andi a4, a6, 2
3724 ; RV32ZVE32F-NEXT: bnez a4, .LBB44_6
3725 ; RV32ZVE32F-NEXT: .LBB44_2:
3726 ; RV32ZVE32F-NEXT: lw a4, 12(a1)
3727 ; RV32ZVE32F-NEXT: lw a5, 8(a1)
3728 ; RV32ZVE32F-NEXT: andi a7, a6, 4
3729 ; RV32ZVE32F-NEXT: bnez a7, .LBB44_7
3730 ; RV32ZVE32F-NEXT: .LBB44_3:
3731 ; RV32ZVE32F-NEXT: lw a7, 20(a1)
3732 ; RV32ZVE32F-NEXT: lw t0, 16(a1)
3733 ; RV32ZVE32F-NEXT: andi a6, a6, 8
3734 ; RV32ZVE32F-NEXT: bnez a6, .LBB44_8
3735 ; RV32ZVE32F-NEXT: .LBB44_4:
3736 ; RV32ZVE32F-NEXT: lw a6, 28(a1)
3737 ; RV32ZVE32F-NEXT: lw a1, 24(a1)
3738 ; RV32ZVE32F-NEXT: j .LBB44_9
3739 ; RV32ZVE32F-NEXT: .LBB44_5:
3740 ; RV32ZVE32F-NEXT: lw a2, 4(a1)
3741 ; RV32ZVE32F-NEXT: lw a3, 0(a1)
3742 ; RV32ZVE32F-NEXT: andi a4, a6, 2
3743 ; RV32ZVE32F-NEXT: beqz a4, .LBB44_2
3744 ; RV32ZVE32F-NEXT: .LBB44_6: # %cond.load1
3745 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3746 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3747 ; RV32ZVE32F-NEXT: vmv.x.s a5, v9
3748 ; RV32ZVE32F-NEXT: lw a4, 4(a5)
3749 ; RV32ZVE32F-NEXT: lw a5, 0(a5)
3750 ; RV32ZVE32F-NEXT: andi a7, a6, 4
3751 ; RV32ZVE32F-NEXT: beqz a7, .LBB44_3
3752 ; RV32ZVE32F-NEXT: .LBB44_7: # %cond.load4
3753 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3754 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
3755 ; RV32ZVE32F-NEXT: vmv.x.s t0, v9
3756 ; RV32ZVE32F-NEXT: lw a7, 4(t0)
3757 ; RV32ZVE32F-NEXT: lw t0, 0(t0)
3758 ; RV32ZVE32F-NEXT: andi a6, a6, 8
3759 ; RV32ZVE32F-NEXT: beqz a6, .LBB44_4
3760 ; RV32ZVE32F-NEXT: .LBB44_8: # %cond.load7
3761 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3762 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
3763 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
3764 ; RV32ZVE32F-NEXT: lw a6, 4(a1)
3765 ; RV32ZVE32F-NEXT: lw a1, 0(a1)
3766 ; RV32ZVE32F-NEXT: .LBB44_9: # %else8
3767 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
3768 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
3769 ; RV32ZVE32F-NEXT: sw a5, 8(a0)
3770 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
3771 ; RV32ZVE32F-NEXT: sw t0, 16(a0)
3772 ; RV32ZVE32F-NEXT: sw a7, 20(a0)
3773 ; RV32ZVE32F-NEXT: sw a1, 24(a0)
3774 ; RV32ZVE32F-NEXT: sw a6, 28(a0)
3775 ; RV32ZVE32F-NEXT: ret
3777 ; RV64ZVE32F-LABEL: mgather_v4i64:
3778 ; RV64ZVE32F: # %bb.0:
3779 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
3780 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
3781 ; RV64ZVE32F-NEXT: andi a3, a5, 1
3782 ; RV64ZVE32F-NEXT: beqz a3, .LBB44_5
3783 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
3784 ; RV64ZVE32F-NEXT: ld a3, 0(a1)
3785 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
3786 ; RV64ZVE32F-NEXT: andi a4, a5, 2
3787 ; RV64ZVE32F-NEXT: bnez a4, .LBB44_6
3788 ; RV64ZVE32F-NEXT: .LBB44_2:
3789 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
3790 ; RV64ZVE32F-NEXT: andi a6, a5, 4
3791 ; RV64ZVE32F-NEXT: bnez a6, .LBB44_7
3792 ; RV64ZVE32F-NEXT: .LBB44_3:
3793 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
3794 ; RV64ZVE32F-NEXT: andi a5, a5, 8
3795 ; RV64ZVE32F-NEXT: bnez a5, .LBB44_8
3796 ; RV64ZVE32F-NEXT: .LBB44_4:
3797 ; RV64ZVE32F-NEXT: ld a1, 24(a2)
3798 ; RV64ZVE32F-NEXT: j .LBB44_9
3799 ; RV64ZVE32F-NEXT: .LBB44_5:
3800 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
3801 ; RV64ZVE32F-NEXT: andi a4, a5, 2
3802 ; RV64ZVE32F-NEXT: beqz a4, .LBB44_2
3803 ; RV64ZVE32F-NEXT: .LBB44_6: # %cond.load1
3804 ; RV64ZVE32F-NEXT: ld a4, 8(a1)
3805 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
3806 ; RV64ZVE32F-NEXT: andi a6, a5, 4
3807 ; RV64ZVE32F-NEXT: beqz a6, .LBB44_3
3808 ; RV64ZVE32F-NEXT: .LBB44_7: # %cond.load4
3809 ; RV64ZVE32F-NEXT: ld a6, 16(a1)
3810 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
3811 ; RV64ZVE32F-NEXT: andi a5, a5, 8
3812 ; RV64ZVE32F-NEXT: beqz a5, .LBB44_4
3813 ; RV64ZVE32F-NEXT: .LBB44_8: # %cond.load7
3814 ; RV64ZVE32F-NEXT: ld a1, 24(a1)
3815 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3816 ; RV64ZVE32F-NEXT: .LBB44_9: # %else8
3817 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
3818 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
3819 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
3820 ; RV64ZVE32F-NEXT: sd a1, 24(a0)
3821 ; RV64ZVE32F-NEXT: ret
3822 %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x i64> %passthru)
3826 define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) {
3827 ; RV32V-LABEL: mgather_truemask_v4i64:
3829 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3830 ; RV32V-NEXT: vluxei32.v v10, (zero), v8
3831 ; RV32V-NEXT: vmv.v.v v8, v10
3834 ; RV64V-LABEL: mgather_truemask_v4i64:
3836 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
3837 ; RV64V-NEXT: vluxei64.v v8, (zero), v8
3840 ; RV32ZVE32F-LABEL: mgather_truemask_v4i64:
3841 ; RV32ZVE32F: # %bb.0:
3842 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
3843 ; RV32ZVE32F-NEXT: vmset.m v9
3844 ; RV32ZVE32F-NEXT: vmv.x.s a6, v9
3845 ; RV32ZVE32F-NEXT: bnez zero, .LBB45_5
3846 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
3847 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3848 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
3849 ; RV32ZVE32F-NEXT: lw a2, 4(a3)
3850 ; RV32ZVE32F-NEXT: lw a3, 0(a3)
3851 ; RV32ZVE32F-NEXT: andi a4, a6, 2
3852 ; RV32ZVE32F-NEXT: bnez a4, .LBB45_6
3853 ; RV32ZVE32F-NEXT: .LBB45_2:
3854 ; RV32ZVE32F-NEXT: lw a4, 12(a1)
3855 ; RV32ZVE32F-NEXT: lw a5, 8(a1)
3856 ; RV32ZVE32F-NEXT: andi a7, a6, 4
3857 ; RV32ZVE32F-NEXT: bnez a7, .LBB45_7
3858 ; RV32ZVE32F-NEXT: .LBB45_3:
3859 ; RV32ZVE32F-NEXT: lw a7, 20(a1)
3860 ; RV32ZVE32F-NEXT: lw t0, 16(a1)
3861 ; RV32ZVE32F-NEXT: andi a6, a6, 8
3862 ; RV32ZVE32F-NEXT: bnez a6, .LBB45_8
3863 ; RV32ZVE32F-NEXT: .LBB45_4:
3864 ; RV32ZVE32F-NEXT: lw a6, 28(a1)
3865 ; RV32ZVE32F-NEXT: lw a1, 24(a1)
3866 ; RV32ZVE32F-NEXT: j .LBB45_9
3867 ; RV32ZVE32F-NEXT: .LBB45_5:
3868 ; RV32ZVE32F-NEXT: lw a2, 4(a1)
3869 ; RV32ZVE32F-NEXT: lw a3, 0(a1)
3870 ; RV32ZVE32F-NEXT: andi a4, a6, 2
3871 ; RV32ZVE32F-NEXT: beqz a4, .LBB45_2
3872 ; RV32ZVE32F-NEXT: .LBB45_6: # %cond.load1
3873 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3874 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
3875 ; RV32ZVE32F-NEXT: vmv.x.s a5, v9
3876 ; RV32ZVE32F-NEXT: lw a4, 4(a5)
3877 ; RV32ZVE32F-NEXT: lw a5, 0(a5)
3878 ; RV32ZVE32F-NEXT: andi a7, a6, 4
3879 ; RV32ZVE32F-NEXT: beqz a7, .LBB45_3
3880 ; RV32ZVE32F-NEXT: .LBB45_7: # %cond.load4
3881 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3882 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
3883 ; RV32ZVE32F-NEXT: vmv.x.s t0, v9
3884 ; RV32ZVE32F-NEXT: lw a7, 4(t0)
3885 ; RV32ZVE32F-NEXT: lw t0, 0(t0)
3886 ; RV32ZVE32F-NEXT: andi a6, a6, 8
3887 ; RV32ZVE32F-NEXT: beqz a6, .LBB45_4
3888 ; RV32ZVE32F-NEXT: .LBB45_8: # %cond.load7
3889 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
3890 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
3891 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
3892 ; RV32ZVE32F-NEXT: lw a6, 4(a1)
3893 ; RV32ZVE32F-NEXT: lw a1, 0(a1)
3894 ; RV32ZVE32F-NEXT: .LBB45_9: # %else8
3895 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
3896 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
3897 ; RV32ZVE32F-NEXT: sw a5, 8(a0)
3898 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
3899 ; RV32ZVE32F-NEXT: sw t0, 16(a0)
3900 ; RV32ZVE32F-NEXT: sw a7, 20(a0)
3901 ; RV32ZVE32F-NEXT: sw a1, 24(a0)
3902 ; RV32ZVE32F-NEXT: sw a6, 28(a0)
3903 ; RV32ZVE32F-NEXT: ret
3905 ; RV64ZVE32F-LABEL: mgather_truemask_v4i64:
3906 ; RV64ZVE32F: # %bb.0:
3907 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
3908 ; RV64ZVE32F-NEXT: vmset.m v8
3909 ; RV64ZVE32F-NEXT: vmv.x.s a5, v8
3910 ; RV64ZVE32F-NEXT: bnez zero, .LBB45_5
3911 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
3912 ; RV64ZVE32F-NEXT: ld a3, 0(a1)
3913 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
3914 ; RV64ZVE32F-NEXT: andi a4, a5, 2
3915 ; RV64ZVE32F-NEXT: bnez a4, .LBB45_6
3916 ; RV64ZVE32F-NEXT: .LBB45_2:
3917 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
3918 ; RV64ZVE32F-NEXT: andi a6, a5, 4
3919 ; RV64ZVE32F-NEXT: bnez a6, .LBB45_7
3920 ; RV64ZVE32F-NEXT: .LBB45_3:
3921 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
3922 ; RV64ZVE32F-NEXT: andi a5, a5, 8
3923 ; RV64ZVE32F-NEXT: bnez a5, .LBB45_8
3924 ; RV64ZVE32F-NEXT: .LBB45_4:
3925 ; RV64ZVE32F-NEXT: ld a1, 24(a2)
3926 ; RV64ZVE32F-NEXT: j .LBB45_9
3927 ; RV64ZVE32F-NEXT: .LBB45_5:
3928 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
3929 ; RV64ZVE32F-NEXT: andi a4, a5, 2
3930 ; RV64ZVE32F-NEXT: beqz a4, .LBB45_2
3931 ; RV64ZVE32F-NEXT: .LBB45_6: # %cond.load1
3932 ; RV64ZVE32F-NEXT: ld a4, 8(a1)
3933 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
3934 ; RV64ZVE32F-NEXT: andi a6, a5, 4
3935 ; RV64ZVE32F-NEXT: beqz a6, .LBB45_3
3936 ; RV64ZVE32F-NEXT: .LBB45_7: # %cond.load4
3937 ; RV64ZVE32F-NEXT: ld a6, 16(a1)
3938 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
3939 ; RV64ZVE32F-NEXT: andi a5, a5, 8
3940 ; RV64ZVE32F-NEXT: beqz a5, .LBB45_4
3941 ; RV64ZVE32F-NEXT: .LBB45_8: # %cond.load7
3942 ; RV64ZVE32F-NEXT: ld a1, 24(a1)
3943 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
3944 ; RV64ZVE32F-NEXT: .LBB45_9: # %else8
3945 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
3946 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
3947 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
3948 ; RV64ZVE32F-NEXT: sd a1, 24(a0)
3949 ; RV64ZVE32F-NEXT: ret
3950 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
3951 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
3952 %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %mtrue, <4 x i64> %passthru)
3956 define <4 x i64> @mgather_falsemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) {
3957 ; RV32V-LABEL: mgather_falsemask_v4i64:
3959 ; RV32V-NEXT: vmv2r.v v8, v10
3962 ; RV64V-LABEL: mgather_falsemask_v4i64:
3964 ; RV64V-NEXT: vmv2r.v v8, v10
3967 ; RV32ZVE32F-LABEL: mgather_falsemask_v4i64:
3968 ; RV32ZVE32F: # %bb.0:
3969 ; RV32ZVE32F-NEXT: lw a2, 0(a1)
3970 ; RV32ZVE32F-NEXT: lw a3, 4(a1)
3971 ; RV32ZVE32F-NEXT: lw a4, 8(a1)
3972 ; RV32ZVE32F-NEXT: lw a5, 12(a1)
3973 ; RV32ZVE32F-NEXT: lw a6, 28(a1)
3974 ; RV32ZVE32F-NEXT: lw a7, 24(a1)
3975 ; RV32ZVE32F-NEXT: lw t0, 20(a1)
3976 ; RV32ZVE32F-NEXT: lw a1, 16(a1)
3977 ; RV32ZVE32F-NEXT: sw a6, 28(a0)
3978 ; RV32ZVE32F-NEXT: sw a7, 24(a0)
3979 ; RV32ZVE32F-NEXT: sw t0, 20(a0)
3980 ; RV32ZVE32F-NEXT: sw a1, 16(a0)
3981 ; RV32ZVE32F-NEXT: sw a5, 12(a0)
3982 ; RV32ZVE32F-NEXT: sw a4, 8(a0)
3983 ; RV32ZVE32F-NEXT: sw a3, 4(a0)
3984 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
3985 ; RV32ZVE32F-NEXT: ret
3987 ; RV64ZVE32F-LABEL: mgather_falsemask_v4i64:
3988 ; RV64ZVE32F: # %bb.0:
3989 ; RV64ZVE32F-NEXT: ld a1, 24(a2)
3990 ; RV64ZVE32F-NEXT: ld a3, 16(a2)
3991 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
3992 ; RV64ZVE32F-NEXT: ld a2, 0(a2)
3993 ; RV64ZVE32F-NEXT: sd a1, 24(a0)
3994 ; RV64ZVE32F-NEXT: sd a3, 16(a0)
3995 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
3996 ; RV64ZVE32F-NEXT: sd a2, 0(a0)
3997 ; RV64ZVE32F-NEXT: ret
3998 %v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x i64> %passthru)
4002 declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>)
4004 define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthru) {
4005 ; RV32V-LABEL: mgather_v8i64:
4007 ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
4008 ; RV32V-NEXT: vluxei32.v v12, (zero), v8, v0.t
4009 ; RV32V-NEXT: vmv.v.v v8, v12
4012 ; RV64V-LABEL: mgather_v8i64:
4014 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
4015 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
4016 ; RV64V-NEXT: vmv.v.v v8, v12
4019 ; RV32ZVE32F-LABEL: mgather_v8i64:
4020 ; RV32ZVE32F: # %bb.0:
4021 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4022 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4023 ; RV32ZVE32F-NEXT: andi a2, t0, 1
4024 ; RV32ZVE32F-NEXT: beqz a2, .LBB47_7
4025 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
4026 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4027 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
4028 ; RV32ZVE32F-NEXT: lw a2, 4(a3)
4029 ; RV32ZVE32F-NEXT: lw a3, 0(a3)
4030 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4031 ; RV32ZVE32F-NEXT: bnez a4, .LBB47_8
4032 ; RV32ZVE32F-NEXT: .LBB47_2:
4033 ; RV32ZVE32F-NEXT: lw a4, 12(a1)
4034 ; RV32ZVE32F-NEXT: lw a5, 8(a1)
4035 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4036 ; RV32ZVE32F-NEXT: bnez a6, .LBB47_9
4037 ; RV32ZVE32F-NEXT: .LBB47_3:
4038 ; RV32ZVE32F-NEXT: lw a6, 20(a1)
4039 ; RV32ZVE32F-NEXT: lw a7, 16(a1)
4040 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4041 ; RV32ZVE32F-NEXT: bnez t1, .LBB47_10
4042 ; RV32ZVE32F-NEXT: .LBB47_4:
4043 ; RV32ZVE32F-NEXT: lw t1, 28(a1)
4044 ; RV32ZVE32F-NEXT: lw t2, 24(a1)
4045 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4046 ; RV32ZVE32F-NEXT: bnez t3, .LBB47_11
4047 ; RV32ZVE32F-NEXT: .LBB47_5:
4048 ; RV32ZVE32F-NEXT: lw t3, 36(a1)
4049 ; RV32ZVE32F-NEXT: lw t4, 32(a1)
4050 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4051 ; RV32ZVE32F-NEXT: bnez t5, .LBB47_12
4052 ; RV32ZVE32F-NEXT: .LBB47_6:
4053 ; RV32ZVE32F-NEXT: lw t5, 44(a1)
4054 ; RV32ZVE32F-NEXT: lw t6, 40(a1)
4055 ; RV32ZVE32F-NEXT: j .LBB47_13
4056 ; RV32ZVE32F-NEXT: .LBB47_7:
4057 ; RV32ZVE32F-NEXT: lw a2, 4(a1)
4058 ; RV32ZVE32F-NEXT: lw a3, 0(a1)
4059 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4060 ; RV32ZVE32F-NEXT: beqz a4, .LBB47_2
4061 ; RV32ZVE32F-NEXT: .LBB47_8: # %cond.load1
4062 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4063 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4064 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
4065 ; RV32ZVE32F-NEXT: lw a4, 4(a5)
4066 ; RV32ZVE32F-NEXT: lw a5, 0(a5)
4067 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4068 ; RV32ZVE32F-NEXT: beqz a6, .LBB47_3
4069 ; RV32ZVE32F-NEXT: .LBB47_9: # %cond.load4
4070 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4071 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4072 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
4073 ; RV32ZVE32F-NEXT: lw a6, 4(a7)
4074 ; RV32ZVE32F-NEXT: lw a7, 0(a7)
4075 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4076 ; RV32ZVE32F-NEXT: beqz t1, .LBB47_4
4077 ; RV32ZVE32F-NEXT: .LBB47_10: # %cond.load7
4078 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4079 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4080 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
4081 ; RV32ZVE32F-NEXT: lw t1, 4(t2)
4082 ; RV32ZVE32F-NEXT: lw t2, 0(t2)
4083 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4084 ; RV32ZVE32F-NEXT: beqz t3, .LBB47_5
4085 ; RV32ZVE32F-NEXT: .LBB47_11: # %cond.load10
4086 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4087 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4088 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
4089 ; RV32ZVE32F-NEXT: lw t3, 4(t4)
4090 ; RV32ZVE32F-NEXT: lw t4, 0(t4)
4091 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4092 ; RV32ZVE32F-NEXT: beqz t5, .LBB47_6
4093 ; RV32ZVE32F-NEXT: .LBB47_12: # %cond.load13
4094 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4095 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4096 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
4097 ; RV32ZVE32F-NEXT: lw t5, 4(t6)
4098 ; RV32ZVE32F-NEXT: lw t6, 0(t6)
4099 ; RV32ZVE32F-NEXT: .LBB47_13: # %else14
4100 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4101 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4102 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4103 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4104 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4105 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4106 ; RV32ZVE32F-NEXT: andi s0, t0, 64
4107 ; RV32ZVE32F-NEXT: beqz s0, .LBB47_16
4108 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
4109 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4110 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4111 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
4112 ; RV32ZVE32F-NEXT: lw s0, 4(s1)
4113 ; RV32ZVE32F-NEXT: lw s1, 0(s1)
4114 ; RV32ZVE32F-NEXT: andi t0, t0, -128
4115 ; RV32ZVE32F-NEXT: bnez t0, .LBB47_17
4116 ; RV32ZVE32F-NEXT: .LBB47_15:
4117 ; RV32ZVE32F-NEXT: lw t0, 60(a1)
4118 ; RV32ZVE32F-NEXT: lw a1, 56(a1)
4119 ; RV32ZVE32F-NEXT: j .LBB47_18
4120 ; RV32ZVE32F-NEXT: .LBB47_16:
4121 ; RV32ZVE32F-NEXT: lw s0, 52(a1)
4122 ; RV32ZVE32F-NEXT: lw s1, 48(a1)
4123 ; RV32ZVE32F-NEXT: andi t0, t0, -128
4124 ; RV32ZVE32F-NEXT: beqz t0, .LBB47_15
4125 ; RV32ZVE32F-NEXT: .LBB47_17: # %cond.load19
4126 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4127 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4128 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
4129 ; RV32ZVE32F-NEXT: lw t0, 4(a1)
4130 ; RV32ZVE32F-NEXT: lw a1, 0(a1)
4131 ; RV32ZVE32F-NEXT: .LBB47_18: # %else20
4132 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4133 ; RV32ZVE32F-NEXT: sw a2, 4(a0)
4134 ; RV32ZVE32F-NEXT: sw a5, 8(a0)
4135 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
4136 ; RV32ZVE32F-NEXT: sw a7, 16(a0)
4137 ; RV32ZVE32F-NEXT: sw a6, 20(a0)
4138 ; RV32ZVE32F-NEXT: sw t2, 24(a0)
4139 ; RV32ZVE32F-NEXT: sw t1, 28(a0)
4140 ; RV32ZVE32F-NEXT: sw t4, 32(a0)
4141 ; RV32ZVE32F-NEXT: sw t3, 36(a0)
4142 ; RV32ZVE32F-NEXT: sw t6, 40(a0)
4143 ; RV32ZVE32F-NEXT: sw t5, 44(a0)
4144 ; RV32ZVE32F-NEXT: sw s1, 48(a0)
4145 ; RV32ZVE32F-NEXT: sw s0, 52(a0)
4146 ; RV32ZVE32F-NEXT: sw a1, 56(a0)
4147 ; RV32ZVE32F-NEXT: sw t0, 60(a0)
4148 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4149 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4150 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4151 ; RV32ZVE32F-NEXT: ret
4153 ; RV64ZVE32F-LABEL: mgather_v8i64:
4154 ; RV64ZVE32F: # %bb.0:
4155 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4156 ; RV64ZVE32F-NEXT: vmv.x.s a6, v0
4157 ; RV64ZVE32F-NEXT: andi a3, a6, 1
4158 ; RV64ZVE32F-NEXT: beqz a3, .LBB47_9
4159 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
4160 ; RV64ZVE32F-NEXT: ld a3, 0(a1)
4161 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
4162 ; RV64ZVE32F-NEXT: andi a4, a6, 2
4163 ; RV64ZVE32F-NEXT: bnez a4, .LBB47_10
4164 ; RV64ZVE32F-NEXT: .LBB47_2:
4165 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
4166 ; RV64ZVE32F-NEXT: andi a5, a6, 4
4167 ; RV64ZVE32F-NEXT: bnez a5, .LBB47_11
4168 ; RV64ZVE32F-NEXT: .LBB47_3:
4169 ; RV64ZVE32F-NEXT: ld a5, 16(a2)
4170 ; RV64ZVE32F-NEXT: andi a7, a6, 8
4171 ; RV64ZVE32F-NEXT: bnez a7, .LBB47_12
4172 ; RV64ZVE32F-NEXT: .LBB47_4:
4173 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
4174 ; RV64ZVE32F-NEXT: andi t0, a6, 16
4175 ; RV64ZVE32F-NEXT: bnez t0, .LBB47_13
4176 ; RV64ZVE32F-NEXT: .LBB47_5:
4177 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
4178 ; RV64ZVE32F-NEXT: andi t1, a6, 32
4179 ; RV64ZVE32F-NEXT: bnez t1, .LBB47_14
4180 ; RV64ZVE32F-NEXT: .LBB47_6:
4181 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
4182 ; RV64ZVE32F-NEXT: andi t2, a6, 64
4183 ; RV64ZVE32F-NEXT: bnez t2, .LBB47_15
4184 ; RV64ZVE32F-NEXT: .LBB47_7:
4185 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
4186 ; RV64ZVE32F-NEXT: andi a6, a6, -128
4187 ; RV64ZVE32F-NEXT: bnez a6, .LBB47_16
4188 ; RV64ZVE32F-NEXT: .LBB47_8:
4189 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
4190 ; RV64ZVE32F-NEXT: j .LBB47_17
4191 ; RV64ZVE32F-NEXT: .LBB47_9:
4192 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
4193 ; RV64ZVE32F-NEXT: andi a4, a6, 2
4194 ; RV64ZVE32F-NEXT: beqz a4, .LBB47_2
4195 ; RV64ZVE32F-NEXT: .LBB47_10: # %cond.load1
4196 ; RV64ZVE32F-NEXT: ld a4, 8(a1)
4197 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
4198 ; RV64ZVE32F-NEXT: andi a5, a6, 4
4199 ; RV64ZVE32F-NEXT: beqz a5, .LBB47_3
4200 ; RV64ZVE32F-NEXT: .LBB47_11: # %cond.load4
4201 ; RV64ZVE32F-NEXT: ld a5, 16(a1)
4202 ; RV64ZVE32F-NEXT: ld a5, 0(a5)
4203 ; RV64ZVE32F-NEXT: andi a7, a6, 8
4204 ; RV64ZVE32F-NEXT: beqz a7, .LBB47_4
4205 ; RV64ZVE32F-NEXT: .LBB47_12: # %cond.load7
4206 ; RV64ZVE32F-NEXT: ld a7, 24(a1)
4207 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
4208 ; RV64ZVE32F-NEXT: andi t0, a6, 16
4209 ; RV64ZVE32F-NEXT: beqz t0, .LBB47_5
4210 ; RV64ZVE32F-NEXT: .LBB47_13: # %cond.load10
4211 ; RV64ZVE32F-NEXT: ld t0, 32(a1)
4212 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
4213 ; RV64ZVE32F-NEXT: andi t1, a6, 32
4214 ; RV64ZVE32F-NEXT: beqz t1, .LBB47_6
4215 ; RV64ZVE32F-NEXT: .LBB47_14: # %cond.load13
4216 ; RV64ZVE32F-NEXT: ld t1, 40(a1)
4217 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
4218 ; RV64ZVE32F-NEXT: andi t2, a6, 64
4219 ; RV64ZVE32F-NEXT: beqz t2, .LBB47_7
4220 ; RV64ZVE32F-NEXT: .LBB47_15: # %cond.load16
4221 ; RV64ZVE32F-NEXT: ld t2, 48(a1)
4222 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
4223 ; RV64ZVE32F-NEXT: andi a6, a6, -128
4224 ; RV64ZVE32F-NEXT: beqz a6, .LBB47_8
4225 ; RV64ZVE32F-NEXT: .LBB47_16: # %cond.load19
4226 ; RV64ZVE32F-NEXT: ld a1, 56(a1)
4227 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
4228 ; RV64ZVE32F-NEXT: .LBB47_17: # %else20
4229 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4230 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
4231 ; RV64ZVE32F-NEXT: sd a5, 16(a0)
4232 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
4233 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
4234 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
4235 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
4236 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
4237 ; RV64ZVE32F-NEXT: ret
4238 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
4242 define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
4243 ; RV32V-LABEL: mgather_baseidx_v8i8_v8i64:
4245 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4246 ; RV32V-NEXT: vsext.vf4 v10, v8
4247 ; RV32V-NEXT: vsll.vi v8, v10, 3
4248 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
4249 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
4250 ; RV32V-NEXT: vmv.v.v v8, v12
4253 ; RV64V-LABEL: mgather_baseidx_v8i8_v8i64:
4255 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
4256 ; RV64V-NEXT: vsext.vf8 v16, v8
4257 ; RV64V-NEXT: vsll.vi v8, v16, 3
4258 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
4259 ; RV64V-NEXT: vmv.v.v v8, v12
4262 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i8_v8i64:
4263 ; RV32ZVE32F: # %bb.0:
4264 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4265 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
4266 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4267 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4268 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4269 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4270 ; RV32ZVE32F-NEXT: andi a1, t0, 1
4271 ; RV32ZVE32F-NEXT: beqz a1, .LBB48_7
4272 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
4273 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4274 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
4275 ; RV32ZVE32F-NEXT: lw a1, 4(a3)
4276 ; RV32ZVE32F-NEXT: lw a3, 0(a3)
4277 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4278 ; RV32ZVE32F-NEXT: bnez a4, .LBB48_8
4279 ; RV32ZVE32F-NEXT: .LBB48_2:
4280 ; RV32ZVE32F-NEXT: lw a4, 12(a2)
4281 ; RV32ZVE32F-NEXT: lw a5, 8(a2)
4282 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4283 ; RV32ZVE32F-NEXT: bnez a6, .LBB48_9
4284 ; RV32ZVE32F-NEXT: .LBB48_3:
4285 ; RV32ZVE32F-NEXT: lw a6, 20(a2)
4286 ; RV32ZVE32F-NEXT: lw a7, 16(a2)
4287 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4288 ; RV32ZVE32F-NEXT: bnez t1, .LBB48_10
4289 ; RV32ZVE32F-NEXT: .LBB48_4:
4290 ; RV32ZVE32F-NEXT: lw t1, 28(a2)
4291 ; RV32ZVE32F-NEXT: lw t2, 24(a2)
4292 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4293 ; RV32ZVE32F-NEXT: bnez t3, .LBB48_11
4294 ; RV32ZVE32F-NEXT: .LBB48_5:
4295 ; RV32ZVE32F-NEXT: lw t3, 36(a2)
4296 ; RV32ZVE32F-NEXT: lw t4, 32(a2)
4297 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4298 ; RV32ZVE32F-NEXT: bnez t5, .LBB48_12
4299 ; RV32ZVE32F-NEXT: .LBB48_6:
4300 ; RV32ZVE32F-NEXT: lw t5, 44(a2)
4301 ; RV32ZVE32F-NEXT: lw t6, 40(a2)
4302 ; RV32ZVE32F-NEXT: j .LBB48_13
4303 ; RV32ZVE32F-NEXT: .LBB48_7:
4304 ; RV32ZVE32F-NEXT: lw a1, 4(a2)
4305 ; RV32ZVE32F-NEXT: lw a3, 0(a2)
4306 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4307 ; RV32ZVE32F-NEXT: beqz a4, .LBB48_2
4308 ; RV32ZVE32F-NEXT: .LBB48_8: # %cond.load1
4309 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4310 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4311 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
4312 ; RV32ZVE32F-NEXT: lw a4, 4(a5)
4313 ; RV32ZVE32F-NEXT: lw a5, 0(a5)
4314 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4315 ; RV32ZVE32F-NEXT: beqz a6, .LBB48_3
4316 ; RV32ZVE32F-NEXT: .LBB48_9: # %cond.load4
4317 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4318 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4319 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
4320 ; RV32ZVE32F-NEXT: lw a6, 4(a7)
4321 ; RV32ZVE32F-NEXT: lw a7, 0(a7)
4322 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4323 ; RV32ZVE32F-NEXT: beqz t1, .LBB48_4
4324 ; RV32ZVE32F-NEXT: .LBB48_10: # %cond.load7
4325 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4326 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4327 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
4328 ; RV32ZVE32F-NEXT: lw t1, 4(t2)
4329 ; RV32ZVE32F-NEXT: lw t2, 0(t2)
4330 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4331 ; RV32ZVE32F-NEXT: beqz t3, .LBB48_5
4332 ; RV32ZVE32F-NEXT: .LBB48_11: # %cond.load10
4333 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4334 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4335 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
4336 ; RV32ZVE32F-NEXT: lw t3, 4(t4)
4337 ; RV32ZVE32F-NEXT: lw t4, 0(t4)
4338 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4339 ; RV32ZVE32F-NEXT: beqz t5, .LBB48_6
4340 ; RV32ZVE32F-NEXT: .LBB48_12: # %cond.load13
4341 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4342 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4343 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
4344 ; RV32ZVE32F-NEXT: lw t5, 4(t6)
4345 ; RV32ZVE32F-NEXT: lw t6, 0(t6)
4346 ; RV32ZVE32F-NEXT: .LBB48_13: # %else14
4347 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4348 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4349 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4350 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4351 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4352 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4353 ; RV32ZVE32F-NEXT: andi s0, t0, 64
4354 ; RV32ZVE32F-NEXT: beqz s0, .LBB48_16
4355 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
4356 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4357 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4358 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
4359 ; RV32ZVE32F-NEXT: lw s0, 4(s1)
4360 ; RV32ZVE32F-NEXT: lw s1, 0(s1)
4361 ; RV32ZVE32F-NEXT: andi t0, t0, -128
4362 ; RV32ZVE32F-NEXT: bnez t0, .LBB48_17
4363 ; RV32ZVE32F-NEXT: .LBB48_15:
4364 ; RV32ZVE32F-NEXT: lw t0, 60(a2)
4365 ; RV32ZVE32F-NEXT: lw a2, 56(a2)
4366 ; RV32ZVE32F-NEXT: j .LBB48_18
4367 ; RV32ZVE32F-NEXT: .LBB48_16:
4368 ; RV32ZVE32F-NEXT: lw s0, 52(a2)
4369 ; RV32ZVE32F-NEXT: lw s1, 48(a2)
4370 ; RV32ZVE32F-NEXT: andi t0, t0, -128
4371 ; RV32ZVE32F-NEXT: beqz t0, .LBB48_15
4372 ; RV32ZVE32F-NEXT: .LBB48_17: # %cond.load19
4373 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4374 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4375 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
4376 ; RV32ZVE32F-NEXT: lw t0, 4(a2)
4377 ; RV32ZVE32F-NEXT: lw a2, 0(a2)
4378 ; RV32ZVE32F-NEXT: .LBB48_18: # %else20
4379 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4380 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
4381 ; RV32ZVE32F-NEXT: sw a5, 8(a0)
4382 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
4383 ; RV32ZVE32F-NEXT: sw a7, 16(a0)
4384 ; RV32ZVE32F-NEXT: sw a6, 20(a0)
4385 ; RV32ZVE32F-NEXT: sw t2, 24(a0)
4386 ; RV32ZVE32F-NEXT: sw t1, 28(a0)
4387 ; RV32ZVE32F-NEXT: sw t4, 32(a0)
4388 ; RV32ZVE32F-NEXT: sw t3, 36(a0)
4389 ; RV32ZVE32F-NEXT: sw t6, 40(a0)
4390 ; RV32ZVE32F-NEXT: sw t5, 44(a0)
4391 ; RV32ZVE32F-NEXT: sw s1, 48(a0)
4392 ; RV32ZVE32F-NEXT: sw s0, 52(a0)
4393 ; RV32ZVE32F-NEXT: sw a2, 56(a0)
4394 ; RV32ZVE32F-NEXT: sw t0, 60(a0)
4395 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4396 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4397 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4398 ; RV32ZVE32F-NEXT: ret
4400 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i64:
4401 ; RV64ZVE32F: # %bb.0:
4402 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4403 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
4404 ; RV64ZVE32F-NEXT: andi a3, a5, 1
4405 ; RV64ZVE32F-NEXT: beqz a3, .LBB48_3
4406 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
4407 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
4408 ; RV64ZVE32F-NEXT: slli a3, a3, 3
4409 ; RV64ZVE32F-NEXT: add a3, a1, a3
4410 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
4411 ; RV64ZVE32F-NEXT: andi a4, a5, 2
4412 ; RV64ZVE32F-NEXT: bnez a4, .LBB48_4
4413 ; RV64ZVE32F-NEXT: .LBB48_2:
4414 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
4415 ; RV64ZVE32F-NEXT: j .LBB48_5
4416 ; RV64ZVE32F-NEXT: .LBB48_3:
4417 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
4418 ; RV64ZVE32F-NEXT: andi a4, a5, 2
4419 ; RV64ZVE32F-NEXT: beqz a4, .LBB48_2
4420 ; RV64ZVE32F-NEXT: .LBB48_4: # %cond.load1
4421 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4422 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4423 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
4424 ; RV64ZVE32F-NEXT: slli a4, a4, 3
4425 ; RV64ZVE32F-NEXT: add a4, a1, a4
4426 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
4427 ; RV64ZVE32F-NEXT: .LBB48_5: # %else2
4428 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
4429 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4430 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
4431 ; RV64ZVE32F-NEXT: andi a6, a5, 4
4432 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4433 ; RV64ZVE32F-NEXT: beqz a6, .LBB48_10
4434 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
4435 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
4436 ; RV64ZVE32F-NEXT: slli a6, a6, 3
4437 ; RV64ZVE32F-NEXT: add a6, a1, a6
4438 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
4439 ; RV64ZVE32F-NEXT: andi a7, a5, 8
4440 ; RV64ZVE32F-NEXT: bnez a7, .LBB48_11
4441 ; RV64ZVE32F-NEXT: .LBB48_7:
4442 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
4443 ; RV64ZVE32F-NEXT: andi t0, a5, 16
4444 ; RV64ZVE32F-NEXT: bnez t0, .LBB48_12
4445 ; RV64ZVE32F-NEXT: .LBB48_8:
4446 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
4447 ; RV64ZVE32F-NEXT: andi t1, a5, 32
4448 ; RV64ZVE32F-NEXT: bnez t1, .LBB48_13
4449 ; RV64ZVE32F-NEXT: .LBB48_9:
4450 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
4451 ; RV64ZVE32F-NEXT: j .LBB48_14
4452 ; RV64ZVE32F-NEXT: .LBB48_10:
4453 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
4454 ; RV64ZVE32F-NEXT: andi a7, a5, 8
4455 ; RV64ZVE32F-NEXT: beqz a7, .LBB48_7
4456 ; RV64ZVE32F-NEXT: .LBB48_11: # %cond.load7
4457 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4458 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
4459 ; RV64ZVE32F-NEXT: slli a7, a7, 3
4460 ; RV64ZVE32F-NEXT: add a7, a1, a7
4461 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
4462 ; RV64ZVE32F-NEXT: andi t0, a5, 16
4463 ; RV64ZVE32F-NEXT: beqz t0, .LBB48_8
4464 ; RV64ZVE32F-NEXT: .LBB48_12: # %cond.load10
4465 ; RV64ZVE32F-NEXT: vmv.x.s t0, v9
4466 ; RV64ZVE32F-NEXT: slli t0, t0, 3
4467 ; RV64ZVE32F-NEXT: add t0, a1, t0
4468 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
4469 ; RV64ZVE32F-NEXT: andi t1, a5, 32
4470 ; RV64ZVE32F-NEXT: beqz t1, .LBB48_9
4471 ; RV64ZVE32F-NEXT: .LBB48_13: # %cond.load13
4472 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4473 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
4474 ; RV64ZVE32F-NEXT: slli t1, t1, 3
4475 ; RV64ZVE32F-NEXT: add t1, a1, t1
4476 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
4477 ; RV64ZVE32F-NEXT: .LBB48_14: # %else14
4478 ; RV64ZVE32F-NEXT: andi t2, a5, 64
4479 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4480 ; RV64ZVE32F-NEXT: beqz t2, .LBB48_17
4481 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
4482 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4483 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4484 ; RV64ZVE32F-NEXT: add t2, a1, t2
4485 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
4486 ; RV64ZVE32F-NEXT: andi a5, a5, -128
4487 ; RV64ZVE32F-NEXT: bnez a5, .LBB48_18
4488 ; RV64ZVE32F-NEXT: .LBB48_16:
4489 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
4490 ; RV64ZVE32F-NEXT: j .LBB48_19
4491 ; RV64ZVE32F-NEXT: .LBB48_17:
4492 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
4493 ; RV64ZVE32F-NEXT: andi a5, a5, -128
4494 ; RV64ZVE32F-NEXT: beqz a5, .LBB48_16
4495 ; RV64ZVE32F-NEXT: .LBB48_18: # %cond.load19
4496 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4497 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
4498 ; RV64ZVE32F-NEXT: slli a2, a2, 3
4499 ; RV64ZVE32F-NEXT: add a1, a1, a2
4500 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
4501 ; RV64ZVE32F-NEXT: .LBB48_19: # %else20
4502 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4503 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
4504 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
4505 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
4506 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
4507 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
4508 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
4509 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
4510 ; RV64ZVE32F-NEXT: ret
4511 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
4512 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
4516 define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
4517 ; RV32V-LABEL: mgather_baseidx_sext_v8i8_v8i64:
4519 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4520 ; RV32V-NEXT: vsext.vf4 v10, v8
4521 ; RV32V-NEXT: vsll.vi v8, v10, 3
4522 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
4523 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
4524 ; RV32V-NEXT: vmv.v.v v8, v12
4527 ; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i64:
4529 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
4530 ; RV64V-NEXT: vsext.vf8 v16, v8
4531 ; RV64V-NEXT: vsll.vi v8, v16, 3
4532 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
4533 ; RV64V-NEXT: vmv.v.v v8, v12
4536 ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i64:
4537 ; RV32ZVE32F: # %bb.0:
4538 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4539 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
4540 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4541 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4542 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4543 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4544 ; RV32ZVE32F-NEXT: andi a1, t0, 1
4545 ; RV32ZVE32F-NEXT: beqz a1, .LBB49_7
4546 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
4547 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4548 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
4549 ; RV32ZVE32F-NEXT: lw a1, 4(a3)
4550 ; RV32ZVE32F-NEXT: lw a3, 0(a3)
4551 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4552 ; RV32ZVE32F-NEXT: bnez a4, .LBB49_8
4553 ; RV32ZVE32F-NEXT: .LBB49_2:
4554 ; RV32ZVE32F-NEXT: lw a4, 12(a2)
4555 ; RV32ZVE32F-NEXT: lw a5, 8(a2)
4556 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4557 ; RV32ZVE32F-NEXT: bnez a6, .LBB49_9
4558 ; RV32ZVE32F-NEXT: .LBB49_3:
4559 ; RV32ZVE32F-NEXT: lw a6, 20(a2)
4560 ; RV32ZVE32F-NEXT: lw a7, 16(a2)
4561 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4562 ; RV32ZVE32F-NEXT: bnez t1, .LBB49_10
4563 ; RV32ZVE32F-NEXT: .LBB49_4:
4564 ; RV32ZVE32F-NEXT: lw t1, 28(a2)
4565 ; RV32ZVE32F-NEXT: lw t2, 24(a2)
4566 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4567 ; RV32ZVE32F-NEXT: bnez t3, .LBB49_11
4568 ; RV32ZVE32F-NEXT: .LBB49_5:
4569 ; RV32ZVE32F-NEXT: lw t3, 36(a2)
4570 ; RV32ZVE32F-NEXT: lw t4, 32(a2)
4571 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4572 ; RV32ZVE32F-NEXT: bnez t5, .LBB49_12
4573 ; RV32ZVE32F-NEXT: .LBB49_6:
4574 ; RV32ZVE32F-NEXT: lw t5, 44(a2)
4575 ; RV32ZVE32F-NEXT: lw t6, 40(a2)
4576 ; RV32ZVE32F-NEXT: j .LBB49_13
4577 ; RV32ZVE32F-NEXT: .LBB49_7:
4578 ; RV32ZVE32F-NEXT: lw a1, 4(a2)
4579 ; RV32ZVE32F-NEXT: lw a3, 0(a2)
4580 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4581 ; RV32ZVE32F-NEXT: beqz a4, .LBB49_2
4582 ; RV32ZVE32F-NEXT: .LBB49_8: # %cond.load1
4583 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4584 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4585 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
4586 ; RV32ZVE32F-NEXT: lw a4, 4(a5)
4587 ; RV32ZVE32F-NEXT: lw a5, 0(a5)
4588 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4589 ; RV32ZVE32F-NEXT: beqz a6, .LBB49_3
4590 ; RV32ZVE32F-NEXT: .LBB49_9: # %cond.load4
4591 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4592 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4593 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
4594 ; RV32ZVE32F-NEXT: lw a6, 4(a7)
4595 ; RV32ZVE32F-NEXT: lw a7, 0(a7)
4596 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4597 ; RV32ZVE32F-NEXT: beqz t1, .LBB49_4
4598 ; RV32ZVE32F-NEXT: .LBB49_10: # %cond.load7
4599 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4600 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4601 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
4602 ; RV32ZVE32F-NEXT: lw t1, 4(t2)
4603 ; RV32ZVE32F-NEXT: lw t2, 0(t2)
4604 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4605 ; RV32ZVE32F-NEXT: beqz t3, .LBB49_5
4606 ; RV32ZVE32F-NEXT: .LBB49_11: # %cond.load10
4607 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4608 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4609 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
4610 ; RV32ZVE32F-NEXT: lw t3, 4(t4)
4611 ; RV32ZVE32F-NEXT: lw t4, 0(t4)
4612 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4613 ; RV32ZVE32F-NEXT: beqz t5, .LBB49_6
4614 ; RV32ZVE32F-NEXT: .LBB49_12: # %cond.load13
4615 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4616 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4617 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
4618 ; RV32ZVE32F-NEXT: lw t5, 4(t6)
4619 ; RV32ZVE32F-NEXT: lw t6, 0(t6)
4620 ; RV32ZVE32F-NEXT: .LBB49_13: # %else14
4621 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4622 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4623 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4624 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4625 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4626 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4627 ; RV32ZVE32F-NEXT: andi s0, t0, 64
4628 ; RV32ZVE32F-NEXT: beqz s0, .LBB49_16
4629 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
4630 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4631 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4632 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
4633 ; RV32ZVE32F-NEXT: lw s0, 4(s1)
4634 ; RV32ZVE32F-NEXT: lw s1, 0(s1)
4635 ; RV32ZVE32F-NEXT: andi t0, t0, -128
4636 ; RV32ZVE32F-NEXT: bnez t0, .LBB49_17
4637 ; RV32ZVE32F-NEXT: .LBB49_15:
4638 ; RV32ZVE32F-NEXT: lw t0, 60(a2)
4639 ; RV32ZVE32F-NEXT: lw a2, 56(a2)
4640 ; RV32ZVE32F-NEXT: j .LBB49_18
4641 ; RV32ZVE32F-NEXT: .LBB49_16:
4642 ; RV32ZVE32F-NEXT: lw s0, 52(a2)
4643 ; RV32ZVE32F-NEXT: lw s1, 48(a2)
4644 ; RV32ZVE32F-NEXT: andi t0, t0, -128
4645 ; RV32ZVE32F-NEXT: beqz t0, .LBB49_15
4646 ; RV32ZVE32F-NEXT: .LBB49_17: # %cond.load19
4647 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4648 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4649 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
4650 ; RV32ZVE32F-NEXT: lw t0, 4(a2)
4651 ; RV32ZVE32F-NEXT: lw a2, 0(a2)
4652 ; RV32ZVE32F-NEXT: .LBB49_18: # %else20
4653 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4654 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
4655 ; RV32ZVE32F-NEXT: sw a5, 8(a0)
4656 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
4657 ; RV32ZVE32F-NEXT: sw a7, 16(a0)
4658 ; RV32ZVE32F-NEXT: sw a6, 20(a0)
4659 ; RV32ZVE32F-NEXT: sw t2, 24(a0)
4660 ; RV32ZVE32F-NEXT: sw t1, 28(a0)
4661 ; RV32ZVE32F-NEXT: sw t4, 32(a0)
4662 ; RV32ZVE32F-NEXT: sw t3, 36(a0)
4663 ; RV32ZVE32F-NEXT: sw t6, 40(a0)
4664 ; RV32ZVE32F-NEXT: sw t5, 44(a0)
4665 ; RV32ZVE32F-NEXT: sw s1, 48(a0)
4666 ; RV32ZVE32F-NEXT: sw s0, 52(a0)
4667 ; RV32ZVE32F-NEXT: sw a2, 56(a0)
4668 ; RV32ZVE32F-NEXT: sw t0, 60(a0)
4669 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4670 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4671 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4672 ; RV32ZVE32F-NEXT: ret
4674 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i64:
4675 ; RV64ZVE32F: # %bb.0:
4676 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4677 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
4678 ; RV64ZVE32F-NEXT: andi a3, a5, 1
4679 ; RV64ZVE32F-NEXT: beqz a3, .LBB49_3
4680 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
4681 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
4682 ; RV64ZVE32F-NEXT: slli a3, a3, 3
4683 ; RV64ZVE32F-NEXT: add a3, a1, a3
4684 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
4685 ; RV64ZVE32F-NEXT: andi a4, a5, 2
4686 ; RV64ZVE32F-NEXT: bnez a4, .LBB49_4
4687 ; RV64ZVE32F-NEXT: .LBB49_2:
4688 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
4689 ; RV64ZVE32F-NEXT: j .LBB49_5
4690 ; RV64ZVE32F-NEXT: .LBB49_3:
4691 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
4692 ; RV64ZVE32F-NEXT: andi a4, a5, 2
4693 ; RV64ZVE32F-NEXT: beqz a4, .LBB49_2
4694 ; RV64ZVE32F-NEXT: .LBB49_4: # %cond.load1
4695 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4696 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4697 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
4698 ; RV64ZVE32F-NEXT: slli a4, a4, 3
4699 ; RV64ZVE32F-NEXT: add a4, a1, a4
4700 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
4701 ; RV64ZVE32F-NEXT: .LBB49_5: # %else2
4702 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
4703 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4704 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
4705 ; RV64ZVE32F-NEXT: andi a6, a5, 4
4706 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4707 ; RV64ZVE32F-NEXT: beqz a6, .LBB49_10
4708 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
4709 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
4710 ; RV64ZVE32F-NEXT: slli a6, a6, 3
4711 ; RV64ZVE32F-NEXT: add a6, a1, a6
4712 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
4713 ; RV64ZVE32F-NEXT: andi a7, a5, 8
4714 ; RV64ZVE32F-NEXT: bnez a7, .LBB49_11
4715 ; RV64ZVE32F-NEXT: .LBB49_7:
4716 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
4717 ; RV64ZVE32F-NEXT: andi t0, a5, 16
4718 ; RV64ZVE32F-NEXT: bnez t0, .LBB49_12
4719 ; RV64ZVE32F-NEXT: .LBB49_8:
4720 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
4721 ; RV64ZVE32F-NEXT: andi t1, a5, 32
4722 ; RV64ZVE32F-NEXT: bnez t1, .LBB49_13
4723 ; RV64ZVE32F-NEXT: .LBB49_9:
4724 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
4725 ; RV64ZVE32F-NEXT: j .LBB49_14
4726 ; RV64ZVE32F-NEXT: .LBB49_10:
4727 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
4728 ; RV64ZVE32F-NEXT: andi a7, a5, 8
4729 ; RV64ZVE32F-NEXT: beqz a7, .LBB49_7
4730 ; RV64ZVE32F-NEXT: .LBB49_11: # %cond.load7
4731 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4732 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
4733 ; RV64ZVE32F-NEXT: slli a7, a7, 3
4734 ; RV64ZVE32F-NEXT: add a7, a1, a7
4735 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
4736 ; RV64ZVE32F-NEXT: andi t0, a5, 16
4737 ; RV64ZVE32F-NEXT: beqz t0, .LBB49_8
4738 ; RV64ZVE32F-NEXT: .LBB49_12: # %cond.load10
4739 ; RV64ZVE32F-NEXT: vmv.x.s t0, v9
4740 ; RV64ZVE32F-NEXT: slli t0, t0, 3
4741 ; RV64ZVE32F-NEXT: add t0, a1, t0
4742 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
4743 ; RV64ZVE32F-NEXT: andi t1, a5, 32
4744 ; RV64ZVE32F-NEXT: beqz t1, .LBB49_9
4745 ; RV64ZVE32F-NEXT: .LBB49_13: # %cond.load13
4746 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
4747 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
4748 ; RV64ZVE32F-NEXT: slli t1, t1, 3
4749 ; RV64ZVE32F-NEXT: add t1, a1, t1
4750 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
4751 ; RV64ZVE32F-NEXT: .LBB49_14: # %else14
4752 ; RV64ZVE32F-NEXT: andi t2, a5, 64
4753 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
4754 ; RV64ZVE32F-NEXT: beqz t2, .LBB49_17
4755 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
4756 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
4757 ; RV64ZVE32F-NEXT: slli t2, t2, 3
4758 ; RV64ZVE32F-NEXT: add t2, a1, t2
4759 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
4760 ; RV64ZVE32F-NEXT: andi a5, a5, -128
4761 ; RV64ZVE32F-NEXT: bnez a5, .LBB49_18
4762 ; RV64ZVE32F-NEXT: .LBB49_16:
4763 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
4764 ; RV64ZVE32F-NEXT: j .LBB49_19
4765 ; RV64ZVE32F-NEXT: .LBB49_17:
4766 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
4767 ; RV64ZVE32F-NEXT: andi a5, a5, -128
4768 ; RV64ZVE32F-NEXT: beqz a5, .LBB49_16
4769 ; RV64ZVE32F-NEXT: .LBB49_18: # %cond.load19
4770 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
4771 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
4772 ; RV64ZVE32F-NEXT: slli a2, a2, 3
4773 ; RV64ZVE32F-NEXT: add a1, a1, a2
4774 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
4775 ; RV64ZVE32F-NEXT: .LBB49_19: # %else20
4776 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
4777 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
4778 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
4779 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
4780 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
4781 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
4782 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
4783 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
4784 ; RV64ZVE32F-NEXT: ret
4785 %eidxs = sext <8 x i8> %idxs to <8 x i64>
4786 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
4787 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
4791 define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
4792 ; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8i64:
4794 ; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4795 ; RV32V-NEXT: vzext.vf2 v9, v8
4796 ; RV32V-NEXT: vsll.vi v8, v9, 3
4797 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
4798 ; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t
4799 ; RV32V-NEXT: vmv.v.v v8, v12
4802 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i64:
4804 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4805 ; RV64V-NEXT: vzext.vf2 v9, v8
4806 ; RV64V-NEXT: vsll.vi v8, v9, 3
4807 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
4808 ; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t
4809 ; RV64V-NEXT: vmv.v.v v8, v12
4812 ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i64:
4813 ; RV32ZVE32F: # %bb.0:
4814 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
4815 ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
4816 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
4817 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
4818 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4819 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
4820 ; RV32ZVE32F-NEXT: andi a1, t0, 1
4821 ; RV32ZVE32F-NEXT: beqz a1, .LBB50_7
4822 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
4823 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4824 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
4825 ; RV32ZVE32F-NEXT: lw a1, 4(a3)
4826 ; RV32ZVE32F-NEXT: lw a3, 0(a3)
4827 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4828 ; RV32ZVE32F-NEXT: bnez a4, .LBB50_8
4829 ; RV32ZVE32F-NEXT: .LBB50_2:
4830 ; RV32ZVE32F-NEXT: lw a4, 12(a2)
4831 ; RV32ZVE32F-NEXT: lw a5, 8(a2)
4832 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4833 ; RV32ZVE32F-NEXT: bnez a6, .LBB50_9
4834 ; RV32ZVE32F-NEXT: .LBB50_3:
4835 ; RV32ZVE32F-NEXT: lw a6, 20(a2)
4836 ; RV32ZVE32F-NEXT: lw a7, 16(a2)
4837 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4838 ; RV32ZVE32F-NEXT: bnez t1, .LBB50_10
4839 ; RV32ZVE32F-NEXT: .LBB50_4:
4840 ; RV32ZVE32F-NEXT: lw t1, 28(a2)
4841 ; RV32ZVE32F-NEXT: lw t2, 24(a2)
4842 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4843 ; RV32ZVE32F-NEXT: bnez t3, .LBB50_11
4844 ; RV32ZVE32F-NEXT: .LBB50_5:
4845 ; RV32ZVE32F-NEXT: lw t3, 36(a2)
4846 ; RV32ZVE32F-NEXT: lw t4, 32(a2)
4847 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4848 ; RV32ZVE32F-NEXT: bnez t5, .LBB50_12
4849 ; RV32ZVE32F-NEXT: .LBB50_6:
4850 ; RV32ZVE32F-NEXT: lw t5, 44(a2)
4851 ; RV32ZVE32F-NEXT: lw t6, 40(a2)
4852 ; RV32ZVE32F-NEXT: j .LBB50_13
4853 ; RV32ZVE32F-NEXT: .LBB50_7:
4854 ; RV32ZVE32F-NEXT: lw a1, 4(a2)
4855 ; RV32ZVE32F-NEXT: lw a3, 0(a2)
4856 ; RV32ZVE32F-NEXT: andi a4, t0, 2
4857 ; RV32ZVE32F-NEXT: beqz a4, .LBB50_2
4858 ; RV32ZVE32F-NEXT: .LBB50_8: # %cond.load1
4859 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4860 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
4861 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
4862 ; RV32ZVE32F-NEXT: lw a4, 4(a5)
4863 ; RV32ZVE32F-NEXT: lw a5, 0(a5)
4864 ; RV32ZVE32F-NEXT: andi a6, t0, 4
4865 ; RV32ZVE32F-NEXT: beqz a6, .LBB50_3
4866 ; RV32ZVE32F-NEXT: .LBB50_9: # %cond.load4
4867 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4868 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
4869 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
4870 ; RV32ZVE32F-NEXT: lw a6, 4(a7)
4871 ; RV32ZVE32F-NEXT: lw a7, 0(a7)
4872 ; RV32ZVE32F-NEXT: andi t1, t0, 8
4873 ; RV32ZVE32F-NEXT: beqz t1, .LBB50_4
4874 ; RV32ZVE32F-NEXT: .LBB50_10: # %cond.load7
4875 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
4876 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
4877 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
4878 ; RV32ZVE32F-NEXT: lw t1, 4(t2)
4879 ; RV32ZVE32F-NEXT: lw t2, 0(t2)
4880 ; RV32ZVE32F-NEXT: andi t3, t0, 16
4881 ; RV32ZVE32F-NEXT: beqz t3, .LBB50_5
4882 ; RV32ZVE32F-NEXT: .LBB50_11: # %cond.load10
4883 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4884 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
4885 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
4886 ; RV32ZVE32F-NEXT: lw t3, 4(t4)
4887 ; RV32ZVE32F-NEXT: lw t4, 0(t4)
4888 ; RV32ZVE32F-NEXT: andi t5, t0, 32
4889 ; RV32ZVE32F-NEXT: beqz t5, .LBB50_6
4890 ; RV32ZVE32F-NEXT: .LBB50_12: # %cond.load13
4891 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4892 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
4893 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
4894 ; RV32ZVE32F-NEXT: lw t5, 4(t6)
4895 ; RV32ZVE32F-NEXT: lw t6, 0(t6)
4896 ; RV32ZVE32F-NEXT: .LBB50_13: # %else14
4897 ; RV32ZVE32F-NEXT: addi sp, sp, -16
4898 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
4899 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
4900 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
4901 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
4902 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
4903 ; RV32ZVE32F-NEXT: andi s0, t0, 64
4904 ; RV32ZVE32F-NEXT: beqz s0, .LBB50_16
4905 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
4906 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4907 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
4908 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
4909 ; RV32ZVE32F-NEXT: lw s0, 4(s1)
4910 ; RV32ZVE32F-NEXT: lw s1, 0(s1)
4911 ; RV32ZVE32F-NEXT: andi t0, t0, -128
4912 ; RV32ZVE32F-NEXT: bnez t0, .LBB50_17
4913 ; RV32ZVE32F-NEXT: .LBB50_15:
4914 ; RV32ZVE32F-NEXT: lw t0, 60(a2)
4915 ; RV32ZVE32F-NEXT: lw a2, 56(a2)
4916 ; RV32ZVE32F-NEXT: j .LBB50_18
4917 ; RV32ZVE32F-NEXT: .LBB50_16:
4918 ; RV32ZVE32F-NEXT: lw s0, 52(a2)
4919 ; RV32ZVE32F-NEXT: lw s1, 48(a2)
4920 ; RV32ZVE32F-NEXT: andi t0, t0, -128
4921 ; RV32ZVE32F-NEXT: beqz t0, .LBB50_15
4922 ; RV32ZVE32F-NEXT: .LBB50_17: # %cond.load19
4923 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
4924 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
4925 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
4926 ; RV32ZVE32F-NEXT: lw t0, 4(a2)
4927 ; RV32ZVE32F-NEXT: lw a2, 0(a2)
4928 ; RV32ZVE32F-NEXT: .LBB50_18: # %else20
4929 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
4930 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
4931 ; RV32ZVE32F-NEXT: sw a5, 8(a0)
4932 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
4933 ; RV32ZVE32F-NEXT: sw a7, 16(a0)
4934 ; RV32ZVE32F-NEXT: sw a6, 20(a0)
4935 ; RV32ZVE32F-NEXT: sw t2, 24(a0)
4936 ; RV32ZVE32F-NEXT: sw t1, 28(a0)
4937 ; RV32ZVE32F-NEXT: sw t4, 32(a0)
4938 ; RV32ZVE32F-NEXT: sw t3, 36(a0)
4939 ; RV32ZVE32F-NEXT: sw t6, 40(a0)
4940 ; RV32ZVE32F-NEXT: sw t5, 44(a0)
4941 ; RV32ZVE32F-NEXT: sw s1, 48(a0)
4942 ; RV32ZVE32F-NEXT: sw s0, 52(a0)
4943 ; RV32ZVE32F-NEXT: sw a2, 56(a0)
4944 ; RV32ZVE32F-NEXT: sw t0, 60(a0)
4945 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
4946 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
4947 ; RV32ZVE32F-NEXT: addi sp, sp, 16
4948 ; RV32ZVE32F-NEXT: ret
4950 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i64:
4951 ; RV64ZVE32F: # %bb.0:
4952 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4953 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
4954 ; RV64ZVE32F-NEXT: andi a3, a5, 1
4955 ; RV64ZVE32F-NEXT: beqz a3, .LBB50_3
4956 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
4957 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
4958 ; RV64ZVE32F-NEXT: andi a3, a3, 255
4959 ; RV64ZVE32F-NEXT: slli a3, a3, 3
4960 ; RV64ZVE32F-NEXT: add a3, a1, a3
4961 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
4962 ; RV64ZVE32F-NEXT: andi a4, a5, 2
4963 ; RV64ZVE32F-NEXT: bnez a4, .LBB50_4
4964 ; RV64ZVE32F-NEXT: .LBB50_2:
4965 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
4966 ; RV64ZVE32F-NEXT: j .LBB50_5
4967 ; RV64ZVE32F-NEXT: .LBB50_3:
4968 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
4969 ; RV64ZVE32F-NEXT: andi a4, a5, 2
4970 ; RV64ZVE32F-NEXT: beqz a4, .LBB50_2
4971 ; RV64ZVE32F-NEXT: .LBB50_4: # %cond.load1
4972 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
4973 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
4974 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
4975 ; RV64ZVE32F-NEXT: andi a4, a4, 255
4976 ; RV64ZVE32F-NEXT: slli a4, a4, 3
4977 ; RV64ZVE32F-NEXT: add a4, a1, a4
4978 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
4979 ; RV64ZVE32F-NEXT: .LBB50_5: # %else2
4980 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
4981 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
4982 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
4983 ; RV64ZVE32F-NEXT: andi a6, a5, 4
4984 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
4985 ; RV64ZVE32F-NEXT: beqz a6, .LBB50_10
4986 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
4987 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
4988 ; RV64ZVE32F-NEXT: andi a6, a6, 255
4989 ; RV64ZVE32F-NEXT: slli a6, a6, 3
4990 ; RV64ZVE32F-NEXT: add a6, a1, a6
4991 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
4992 ; RV64ZVE32F-NEXT: andi a7, a5, 8
4993 ; RV64ZVE32F-NEXT: bnez a7, .LBB50_11
4994 ; RV64ZVE32F-NEXT: .LBB50_7:
4995 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
4996 ; RV64ZVE32F-NEXT: andi t0, a5, 16
4997 ; RV64ZVE32F-NEXT: bnez t0, .LBB50_12
4998 ; RV64ZVE32F-NEXT: .LBB50_8:
4999 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
5000 ; RV64ZVE32F-NEXT: andi t1, a5, 32
5001 ; RV64ZVE32F-NEXT: bnez t1, .LBB50_13
5002 ; RV64ZVE32F-NEXT: .LBB50_9:
5003 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
5004 ; RV64ZVE32F-NEXT: j .LBB50_14
5005 ; RV64ZVE32F-NEXT: .LBB50_10:
5006 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
5007 ; RV64ZVE32F-NEXT: andi a7, a5, 8
5008 ; RV64ZVE32F-NEXT: beqz a7, .LBB50_7
5009 ; RV64ZVE32F-NEXT: .LBB50_11: # %cond.load7
5010 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5011 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
5012 ; RV64ZVE32F-NEXT: andi a7, a7, 255
5013 ; RV64ZVE32F-NEXT: slli a7, a7, 3
5014 ; RV64ZVE32F-NEXT: add a7, a1, a7
5015 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
5016 ; RV64ZVE32F-NEXT: andi t0, a5, 16
5017 ; RV64ZVE32F-NEXT: beqz t0, .LBB50_8
5018 ; RV64ZVE32F-NEXT: .LBB50_12: # %cond.load10
5019 ; RV64ZVE32F-NEXT: vmv.x.s t0, v9
5020 ; RV64ZVE32F-NEXT: andi t0, t0, 255
5021 ; RV64ZVE32F-NEXT: slli t0, t0, 3
5022 ; RV64ZVE32F-NEXT: add t0, a1, t0
5023 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
5024 ; RV64ZVE32F-NEXT: andi t1, a5, 32
5025 ; RV64ZVE32F-NEXT: beqz t1, .LBB50_9
5026 ; RV64ZVE32F-NEXT: .LBB50_13: # %cond.load13
5027 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
5028 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
5029 ; RV64ZVE32F-NEXT: andi t1, t1, 255
5030 ; RV64ZVE32F-NEXT: slli t1, t1, 3
5031 ; RV64ZVE32F-NEXT: add t1, a1, t1
5032 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
5033 ; RV64ZVE32F-NEXT: .LBB50_14: # %else14
5034 ; RV64ZVE32F-NEXT: andi t2, a5, 64
5035 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
5036 ; RV64ZVE32F-NEXT: beqz t2, .LBB50_17
5037 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
5038 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5039 ; RV64ZVE32F-NEXT: andi t2, t2, 255
5040 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5041 ; RV64ZVE32F-NEXT: add t2, a1, t2
5042 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
5043 ; RV64ZVE32F-NEXT: andi a5, a5, -128
5044 ; RV64ZVE32F-NEXT: bnez a5, .LBB50_18
5045 ; RV64ZVE32F-NEXT: .LBB50_16:
5046 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
5047 ; RV64ZVE32F-NEXT: j .LBB50_19
5048 ; RV64ZVE32F-NEXT: .LBB50_17:
5049 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
5050 ; RV64ZVE32F-NEXT: andi a5, a5, -128
5051 ; RV64ZVE32F-NEXT: beqz a5, .LBB50_16
5052 ; RV64ZVE32F-NEXT: .LBB50_18: # %cond.load19
5053 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5054 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
5055 ; RV64ZVE32F-NEXT: andi a2, a2, 255
5056 ; RV64ZVE32F-NEXT: slli a2, a2, 3
5057 ; RV64ZVE32F-NEXT: add a1, a1, a2
5058 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
5059 ; RV64ZVE32F-NEXT: .LBB50_19: # %else20
5060 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5061 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
5062 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
5063 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
5064 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
5065 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
5066 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
5067 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
5068 ; RV64ZVE32F-NEXT: ret
5069 %eidxs = zext <8 x i8> %idxs to <8 x i64>
5070 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5071 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
5075 define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
5076 ; RV32V-LABEL: mgather_baseidx_v8i16_v8i64:
5078 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5079 ; RV32V-NEXT: vsext.vf2 v10, v8
5080 ; RV32V-NEXT: vsll.vi v8, v10, 3
5081 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
5082 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
5083 ; RV32V-NEXT: vmv.v.v v8, v12
5086 ; RV64V-LABEL: mgather_baseidx_v8i16_v8i64:
5088 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
5089 ; RV64V-NEXT: vsext.vf4 v16, v8
5090 ; RV64V-NEXT: vsll.vi v8, v16, 3
5091 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
5092 ; RV64V-NEXT: vmv.v.v v8, v12
5095 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64:
5096 ; RV32ZVE32F: # %bb.0:
5097 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5098 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
5099 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
5100 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5101 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5102 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
5103 ; RV32ZVE32F-NEXT: andi a1, t0, 1
5104 ; RV32ZVE32F-NEXT: beqz a1, .LBB51_7
5105 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
5106 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5107 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
5108 ; RV32ZVE32F-NEXT: lw a1, 4(a3)
5109 ; RV32ZVE32F-NEXT: lw a3, 0(a3)
5110 ; RV32ZVE32F-NEXT: andi a4, t0, 2
5111 ; RV32ZVE32F-NEXT: bnez a4, .LBB51_8
5112 ; RV32ZVE32F-NEXT: .LBB51_2:
5113 ; RV32ZVE32F-NEXT: lw a4, 12(a2)
5114 ; RV32ZVE32F-NEXT: lw a5, 8(a2)
5115 ; RV32ZVE32F-NEXT: andi a6, t0, 4
5116 ; RV32ZVE32F-NEXT: bnez a6, .LBB51_9
5117 ; RV32ZVE32F-NEXT: .LBB51_3:
5118 ; RV32ZVE32F-NEXT: lw a6, 20(a2)
5119 ; RV32ZVE32F-NEXT: lw a7, 16(a2)
5120 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5121 ; RV32ZVE32F-NEXT: bnez t1, .LBB51_10
5122 ; RV32ZVE32F-NEXT: .LBB51_4:
5123 ; RV32ZVE32F-NEXT: lw t1, 28(a2)
5124 ; RV32ZVE32F-NEXT: lw t2, 24(a2)
5125 ; RV32ZVE32F-NEXT: andi t3, t0, 16
5126 ; RV32ZVE32F-NEXT: bnez t3, .LBB51_11
5127 ; RV32ZVE32F-NEXT: .LBB51_5:
5128 ; RV32ZVE32F-NEXT: lw t3, 36(a2)
5129 ; RV32ZVE32F-NEXT: lw t4, 32(a2)
5130 ; RV32ZVE32F-NEXT: andi t5, t0, 32
5131 ; RV32ZVE32F-NEXT: bnez t5, .LBB51_12
5132 ; RV32ZVE32F-NEXT: .LBB51_6:
5133 ; RV32ZVE32F-NEXT: lw t5, 44(a2)
5134 ; RV32ZVE32F-NEXT: lw t6, 40(a2)
5135 ; RV32ZVE32F-NEXT: j .LBB51_13
5136 ; RV32ZVE32F-NEXT: .LBB51_7:
5137 ; RV32ZVE32F-NEXT: lw a1, 4(a2)
5138 ; RV32ZVE32F-NEXT: lw a3, 0(a2)
5139 ; RV32ZVE32F-NEXT: andi a4, t0, 2
5140 ; RV32ZVE32F-NEXT: beqz a4, .LBB51_2
5141 ; RV32ZVE32F-NEXT: .LBB51_8: # %cond.load1
5142 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5143 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5144 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
5145 ; RV32ZVE32F-NEXT: lw a4, 4(a5)
5146 ; RV32ZVE32F-NEXT: lw a5, 0(a5)
5147 ; RV32ZVE32F-NEXT: andi a6, t0, 4
5148 ; RV32ZVE32F-NEXT: beqz a6, .LBB51_3
5149 ; RV32ZVE32F-NEXT: .LBB51_9: # %cond.load4
5150 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5151 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5152 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
5153 ; RV32ZVE32F-NEXT: lw a6, 4(a7)
5154 ; RV32ZVE32F-NEXT: lw a7, 0(a7)
5155 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5156 ; RV32ZVE32F-NEXT: beqz t1, .LBB51_4
5157 ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.load7
5158 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5159 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5160 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
5161 ; RV32ZVE32F-NEXT: lw t1, 4(t2)
5162 ; RV32ZVE32F-NEXT: lw t2, 0(t2)
5163 ; RV32ZVE32F-NEXT: andi t3, t0, 16
5164 ; RV32ZVE32F-NEXT: beqz t3, .LBB51_5
5165 ; RV32ZVE32F-NEXT: .LBB51_11: # %cond.load10
5166 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5167 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5168 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
5169 ; RV32ZVE32F-NEXT: lw t3, 4(t4)
5170 ; RV32ZVE32F-NEXT: lw t4, 0(t4)
5171 ; RV32ZVE32F-NEXT: andi t5, t0, 32
5172 ; RV32ZVE32F-NEXT: beqz t5, .LBB51_6
5173 ; RV32ZVE32F-NEXT: .LBB51_12: # %cond.load13
5174 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5175 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5176 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
5177 ; RV32ZVE32F-NEXT: lw t5, 4(t6)
5178 ; RV32ZVE32F-NEXT: lw t6, 0(t6)
5179 ; RV32ZVE32F-NEXT: .LBB51_13: # %else14
5180 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5181 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5182 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5183 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5184 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5185 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5186 ; RV32ZVE32F-NEXT: andi s0, t0, 64
5187 ; RV32ZVE32F-NEXT: beqz s0, .LBB51_16
5188 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
5189 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5190 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5191 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
5192 ; RV32ZVE32F-NEXT: lw s0, 4(s1)
5193 ; RV32ZVE32F-NEXT: lw s1, 0(s1)
5194 ; RV32ZVE32F-NEXT: andi t0, t0, -128
5195 ; RV32ZVE32F-NEXT: bnez t0, .LBB51_17
5196 ; RV32ZVE32F-NEXT: .LBB51_15:
5197 ; RV32ZVE32F-NEXT: lw t0, 60(a2)
5198 ; RV32ZVE32F-NEXT: lw a2, 56(a2)
5199 ; RV32ZVE32F-NEXT: j .LBB51_18
5200 ; RV32ZVE32F-NEXT: .LBB51_16:
5201 ; RV32ZVE32F-NEXT: lw s0, 52(a2)
5202 ; RV32ZVE32F-NEXT: lw s1, 48(a2)
5203 ; RV32ZVE32F-NEXT: andi t0, t0, -128
5204 ; RV32ZVE32F-NEXT: beqz t0, .LBB51_15
5205 ; RV32ZVE32F-NEXT: .LBB51_17: # %cond.load19
5206 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5207 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5208 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
5209 ; RV32ZVE32F-NEXT: lw t0, 4(a2)
5210 ; RV32ZVE32F-NEXT: lw a2, 0(a2)
5211 ; RV32ZVE32F-NEXT: .LBB51_18: # %else20
5212 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
5213 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
5214 ; RV32ZVE32F-NEXT: sw a5, 8(a0)
5215 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
5216 ; RV32ZVE32F-NEXT: sw a7, 16(a0)
5217 ; RV32ZVE32F-NEXT: sw a6, 20(a0)
5218 ; RV32ZVE32F-NEXT: sw t2, 24(a0)
5219 ; RV32ZVE32F-NEXT: sw t1, 28(a0)
5220 ; RV32ZVE32F-NEXT: sw t4, 32(a0)
5221 ; RV32ZVE32F-NEXT: sw t3, 36(a0)
5222 ; RV32ZVE32F-NEXT: sw t6, 40(a0)
5223 ; RV32ZVE32F-NEXT: sw t5, 44(a0)
5224 ; RV32ZVE32F-NEXT: sw s1, 48(a0)
5225 ; RV32ZVE32F-NEXT: sw s0, 52(a0)
5226 ; RV32ZVE32F-NEXT: sw a2, 56(a0)
5227 ; RV32ZVE32F-NEXT: sw t0, 60(a0)
5228 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5229 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5230 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5231 ; RV32ZVE32F-NEXT: ret
5233 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64:
5234 ; RV64ZVE32F: # %bb.0:
5235 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5236 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
5237 ; RV64ZVE32F-NEXT: andi a3, a5, 1
5238 ; RV64ZVE32F-NEXT: beqz a3, .LBB51_3
5239 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
5240 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5241 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
5242 ; RV64ZVE32F-NEXT: slli a3, a3, 3
5243 ; RV64ZVE32F-NEXT: add a3, a1, a3
5244 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
5245 ; RV64ZVE32F-NEXT: andi a4, a5, 2
5246 ; RV64ZVE32F-NEXT: bnez a4, .LBB51_4
5247 ; RV64ZVE32F-NEXT: .LBB51_2:
5248 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
5249 ; RV64ZVE32F-NEXT: j .LBB51_5
5250 ; RV64ZVE32F-NEXT: .LBB51_3:
5251 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
5252 ; RV64ZVE32F-NEXT: andi a4, a5, 2
5253 ; RV64ZVE32F-NEXT: beqz a4, .LBB51_2
5254 ; RV64ZVE32F-NEXT: .LBB51_4: # %cond.load1
5255 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5256 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
5257 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
5258 ; RV64ZVE32F-NEXT: slli a4, a4, 3
5259 ; RV64ZVE32F-NEXT: add a4, a1, a4
5260 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
5261 ; RV64ZVE32F-NEXT: .LBB51_5: # %else2
5262 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
5263 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
5264 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
5265 ; RV64ZVE32F-NEXT: andi a6, a5, 4
5266 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5267 ; RV64ZVE32F-NEXT: beqz a6, .LBB51_10
5268 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
5269 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
5270 ; RV64ZVE32F-NEXT: slli a6, a6, 3
5271 ; RV64ZVE32F-NEXT: add a6, a1, a6
5272 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
5273 ; RV64ZVE32F-NEXT: andi a7, a5, 8
5274 ; RV64ZVE32F-NEXT: bnez a7, .LBB51_11
5275 ; RV64ZVE32F-NEXT: .LBB51_7:
5276 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
5277 ; RV64ZVE32F-NEXT: andi t0, a5, 16
5278 ; RV64ZVE32F-NEXT: bnez t0, .LBB51_12
5279 ; RV64ZVE32F-NEXT: .LBB51_8:
5280 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
5281 ; RV64ZVE32F-NEXT: andi t1, a5, 32
5282 ; RV64ZVE32F-NEXT: bnez t1, .LBB51_13
5283 ; RV64ZVE32F-NEXT: .LBB51_9:
5284 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
5285 ; RV64ZVE32F-NEXT: j .LBB51_14
5286 ; RV64ZVE32F-NEXT: .LBB51_10:
5287 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
5288 ; RV64ZVE32F-NEXT: andi a7, a5, 8
5289 ; RV64ZVE32F-NEXT: beqz a7, .LBB51_7
5290 ; RV64ZVE32F-NEXT: .LBB51_11: # %cond.load7
5291 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5292 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
5293 ; RV64ZVE32F-NEXT: slli a7, a7, 3
5294 ; RV64ZVE32F-NEXT: add a7, a1, a7
5295 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
5296 ; RV64ZVE32F-NEXT: andi t0, a5, 16
5297 ; RV64ZVE32F-NEXT: beqz t0, .LBB51_8
5298 ; RV64ZVE32F-NEXT: .LBB51_12: # %cond.load10
5299 ; RV64ZVE32F-NEXT: vmv.x.s t0, v9
5300 ; RV64ZVE32F-NEXT: slli t0, t0, 3
5301 ; RV64ZVE32F-NEXT: add t0, a1, t0
5302 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
5303 ; RV64ZVE32F-NEXT: andi t1, a5, 32
5304 ; RV64ZVE32F-NEXT: beqz t1, .LBB51_9
5305 ; RV64ZVE32F-NEXT: .LBB51_13: # %cond.load13
5306 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
5307 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
5308 ; RV64ZVE32F-NEXT: slli t1, t1, 3
5309 ; RV64ZVE32F-NEXT: add t1, a1, t1
5310 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
5311 ; RV64ZVE32F-NEXT: .LBB51_14: # %else14
5312 ; RV64ZVE32F-NEXT: andi t2, a5, 64
5313 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
5314 ; RV64ZVE32F-NEXT: beqz t2, .LBB51_17
5315 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
5316 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5317 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5318 ; RV64ZVE32F-NEXT: add t2, a1, t2
5319 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
5320 ; RV64ZVE32F-NEXT: andi a5, a5, -128
5321 ; RV64ZVE32F-NEXT: bnez a5, .LBB51_18
5322 ; RV64ZVE32F-NEXT: .LBB51_16:
5323 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
5324 ; RV64ZVE32F-NEXT: j .LBB51_19
5325 ; RV64ZVE32F-NEXT: .LBB51_17:
5326 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
5327 ; RV64ZVE32F-NEXT: andi a5, a5, -128
5328 ; RV64ZVE32F-NEXT: beqz a5, .LBB51_16
5329 ; RV64ZVE32F-NEXT: .LBB51_18: # %cond.load19
5330 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5331 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
5332 ; RV64ZVE32F-NEXT: slli a2, a2, 3
5333 ; RV64ZVE32F-NEXT: add a1, a1, a2
5334 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
5335 ; RV64ZVE32F-NEXT: .LBB51_19: # %else20
5336 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5337 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
5338 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
5339 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
5340 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
5341 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
5342 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
5343 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
5344 ; RV64ZVE32F-NEXT: ret
5345 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
5346 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
5350 define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
5351 ; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8i64:
5353 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5354 ; RV32V-NEXT: vsext.vf2 v10, v8
5355 ; RV32V-NEXT: vsll.vi v8, v10, 3
5356 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
5357 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
5358 ; RV32V-NEXT: vmv.v.v v8, v12
5361 ; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8i64:
5363 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
5364 ; RV64V-NEXT: vsext.vf4 v16, v8
5365 ; RV64V-NEXT: vsll.vi v8, v16, 3
5366 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
5367 ; RV64V-NEXT: vmv.v.v v8, v12
5370 ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64:
5371 ; RV32ZVE32F: # %bb.0:
5372 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5373 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
5374 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
5375 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5376 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5377 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
5378 ; RV32ZVE32F-NEXT: andi a1, t0, 1
5379 ; RV32ZVE32F-NEXT: beqz a1, .LBB52_7
5380 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
5381 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5382 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
5383 ; RV32ZVE32F-NEXT: lw a1, 4(a3)
5384 ; RV32ZVE32F-NEXT: lw a3, 0(a3)
5385 ; RV32ZVE32F-NEXT: andi a4, t0, 2
5386 ; RV32ZVE32F-NEXT: bnez a4, .LBB52_8
5387 ; RV32ZVE32F-NEXT: .LBB52_2:
5388 ; RV32ZVE32F-NEXT: lw a4, 12(a2)
5389 ; RV32ZVE32F-NEXT: lw a5, 8(a2)
5390 ; RV32ZVE32F-NEXT: andi a6, t0, 4
5391 ; RV32ZVE32F-NEXT: bnez a6, .LBB52_9
5392 ; RV32ZVE32F-NEXT: .LBB52_3:
5393 ; RV32ZVE32F-NEXT: lw a6, 20(a2)
5394 ; RV32ZVE32F-NEXT: lw a7, 16(a2)
5395 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5396 ; RV32ZVE32F-NEXT: bnez t1, .LBB52_10
5397 ; RV32ZVE32F-NEXT: .LBB52_4:
5398 ; RV32ZVE32F-NEXT: lw t1, 28(a2)
5399 ; RV32ZVE32F-NEXT: lw t2, 24(a2)
5400 ; RV32ZVE32F-NEXT: andi t3, t0, 16
5401 ; RV32ZVE32F-NEXT: bnez t3, .LBB52_11
5402 ; RV32ZVE32F-NEXT: .LBB52_5:
5403 ; RV32ZVE32F-NEXT: lw t3, 36(a2)
5404 ; RV32ZVE32F-NEXT: lw t4, 32(a2)
5405 ; RV32ZVE32F-NEXT: andi t5, t0, 32
5406 ; RV32ZVE32F-NEXT: bnez t5, .LBB52_12
5407 ; RV32ZVE32F-NEXT: .LBB52_6:
5408 ; RV32ZVE32F-NEXT: lw t5, 44(a2)
5409 ; RV32ZVE32F-NEXT: lw t6, 40(a2)
5410 ; RV32ZVE32F-NEXT: j .LBB52_13
5411 ; RV32ZVE32F-NEXT: .LBB52_7:
5412 ; RV32ZVE32F-NEXT: lw a1, 4(a2)
5413 ; RV32ZVE32F-NEXT: lw a3, 0(a2)
5414 ; RV32ZVE32F-NEXT: andi a4, t0, 2
5415 ; RV32ZVE32F-NEXT: beqz a4, .LBB52_2
5416 ; RV32ZVE32F-NEXT: .LBB52_8: # %cond.load1
5417 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5418 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5419 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
5420 ; RV32ZVE32F-NEXT: lw a4, 4(a5)
5421 ; RV32ZVE32F-NEXT: lw a5, 0(a5)
5422 ; RV32ZVE32F-NEXT: andi a6, t0, 4
5423 ; RV32ZVE32F-NEXT: beqz a6, .LBB52_3
5424 ; RV32ZVE32F-NEXT: .LBB52_9: # %cond.load4
5425 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5426 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5427 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
5428 ; RV32ZVE32F-NEXT: lw a6, 4(a7)
5429 ; RV32ZVE32F-NEXT: lw a7, 0(a7)
5430 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5431 ; RV32ZVE32F-NEXT: beqz t1, .LBB52_4
5432 ; RV32ZVE32F-NEXT: .LBB52_10: # %cond.load7
5433 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5434 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5435 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
5436 ; RV32ZVE32F-NEXT: lw t1, 4(t2)
5437 ; RV32ZVE32F-NEXT: lw t2, 0(t2)
5438 ; RV32ZVE32F-NEXT: andi t3, t0, 16
5439 ; RV32ZVE32F-NEXT: beqz t3, .LBB52_5
5440 ; RV32ZVE32F-NEXT: .LBB52_11: # %cond.load10
5441 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5442 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5443 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
5444 ; RV32ZVE32F-NEXT: lw t3, 4(t4)
5445 ; RV32ZVE32F-NEXT: lw t4, 0(t4)
5446 ; RV32ZVE32F-NEXT: andi t5, t0, 32
5447 ; RV32ZVE32F-NEXT: beqz t5, .LBB52_6
5448 ; RV32ZVE32F-NEXT: .LBB52_12: # %cond.load13
5449 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5450 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5451 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
5452 ; RV32ZVE32F-NEXT: lw t5, 4(t6)
5453 ; RV32ZVE32F-NEXT: lw t6, 0(t6)
5454 ; RV32ZVE32F-NEXT: .LBB52_13: # %else14
5455 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5456 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5457 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5458 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5459 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5460 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5461 ; RV32ZVE32F-NEXT: andi s0, t0, 64
5462 ; RV32ZVE32F-NEXT: beqz s0, .LBB52_16
5463 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
5464 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5465 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5466 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
5467 ; RV32ZVE32F-NEXT: lw s0, 4(s1)
5468 ; RV32ZVE32F-NEXT: lw s1, 0(s1)
5469 ; RV32ZVE32F-NEXT: andi t0, t0, -128
5470 ; RV32ZVE32F-NEXT: bnez t0, .LBB52_17
5471 ; RV32ZVE32F-NEXT: .LBB52_15:
5472 ; RV32ZVE32F-NEXT: lw t0, 60(a2)
5473 ; RV32ZVE32F-NEXT: lw a2, 56(a2)
5474 ; RV32ZVE32F-NEXT: j .LBB52_18
5475 ; RV32ZVE32F-NEXT: .LBB52_16:
5476 ; RV32ZVE32F-NEXT: lw s0, 52(a2)
5477 ; RV32ZVE32F-NEXT: lw s1, 48(a2)
5478 ; RV32ZVE32F-NEXT: andi t0, t0, -128
5479 ; RV32ZVE32F-NEXT: beqz t0, .LBB52_15
5480 ; RV32ZVE32F-NEXT: .LBB52_17: # %cond.load19
5481 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5482 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5483 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
5484 ; RV32ZVE32F-NEXT: lw t0, 4(a2)
5485 ; RV32ZVE32F-NEXT: lw a2, 0(a2)
5486 ; RV32ZVE32F-NEXT: .LBB52_18: # %else20
5487 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
5488 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
5489 ; RV32ZVE32F-NEXT: sw a5, 8(a0)
5490 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
5491 ; RV32ZVE32F-NEXT: sw a7, 16(a0)
5492 ; RV32ZVE32F-NEXT: sw a6, 20(a0)
5493 ; RV32ZVE32F-NEXT: sw t2, 24(a0)
5494 ; RV32ZVE32F-NEXT: sw t1, 28(a0)
5495 ; RV32ZVE32F-NEXT: sw t4, 32(a0)
5496 ; RV32ZVE32F-NEXT: sw t3, 36(a0)
5497 ; RV32ZVE32F-NEXT: sw t6, 40(a0)
5498 ; RV32ZVE32F-NEXT: sw t5, 44(a0)
5499 ; RV32ZVE32F-NEXT: sw s1, 48(a0)
5500 ; RV32ZVE32F-NEXT: sw s0, 52(a0)
5501 ; RV32ZVE32F-NEXT: sw a2, 56(a0)
5502 ; RV32ZVE32F-NEXT: sw t0, 60(a0)
5503 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5504 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5505 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5506 ; RV32ZVE32F-NEXT: ret
5508 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64:
5509 ; RV64ZVE32F: # %bb.0:
5510 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5511 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
5512 ; RV64ZVE32F-NEXT: andi a3, a5, 1
5513 ; RV64ZVE32F-NEXT: beqz a3, .LBB52_3
5514 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
5515 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5516 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
5517 ; RV64ZVE32F-NEXT: slli a3, a3, 3
5518 ; RV64ZVE32F-NEXT: add a3, a1, a3
5519 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
5520 ; RV64ZVE32F-NEXT: andi a4, a5, 2
5521 ; RV64ZVE32F-NEXT: bnez a4, .LBB52_4
5522 ; RV64ZVE32F-NEXT: .LBB52_2:
5523 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
5524 ; RV64ZVE32F-NEXT: j .LBB52_5
5525 ; RV64ZVE32F-NEXT: .LBB52_3:
5526 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
5527 ; RV64ZVE32F-NEXT: andi a4, a5, 2
5528 ; RV64ZVE32F-NEXT: beqz a4, .LBB52_2
5529 ; RV64ZVE32F-NEXT: .LBB52_4: # %cond.load1
5530 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5531 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
5532 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
5533 ; RV64ZVE32F-NEXT: slli a4, a4, 3
5534 ; RV64ZVE32F-NEXT: add a4, a1, a4
5535 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
5536 ; RV64ZVE32F-NEXT: .LBB52_5: # %else2
5537 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
5538 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
5539 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
5540 ; RV64ZVE32F-NEXT: andi a6, a5, 4
5541 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5542 ; RV64ZVE32F-NEXT: beqz a6, .LBB52_10
5543 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
5544 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
5545 ; RV64ZVE32F-NEXT: slli a6, a6, 3
5546 ; RV64ZVE32F-NEXT: add a6, a1, a6
5547 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
5548 ; RV64ZVE32F-NEXT: andi a7, a5, 8
5549 ; RV64ZVE32F-NEXT: bnez a7, .LBB52_11
5550 ; RV64ZVE32F-NEXT: .LBB52_7:
5551 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
5552 ; RV64ZVE32F-NEXT: andi t0, a5, 16
5553 ; RV64ZVE32F-NEXT: bnez t0, .LBB52_12
5554 ; RV64ZVE32F-NEXT: .LBB52_8:
5555 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
5556 ; RV64ZVE32F-NEXT: andi t1, a5, 32
5557 ; RV64ZVE32F-NEXT: bnez t1, .LBB52_13
5558 ; RV64ZVE32F-NEXT: .LBB52_9:
5559 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
5560 ; RV64ZVE32F-NEXT: j .LBB52_14
5561 ; RV64ZVE32F-NEXT: .LBB52_10:
5562 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
5563 ; RV64ZVE32F-NEXT: andi a7, a5, 8
5564 ; RV64ZVE32F-NEXT: beqz a7, .LBB52_7
5565 ; RV64ZVE32F-NEXT: .LBB52_11: # %cond.load7
5566 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5567 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
5568 ; RV64ZVE32F-NEXT: slli a7, a7, 3
5569 ; RV64ZVE32F-NEXT: add a7, a1, a7
5570 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
5571 ; RV64ZVE32F-NEXT: andi t0, a5, 16
5572 ; RV64ZVE32F-NEXT: beqz t0, .LBB52_8
5573 ; RV64ZVE32F-NEXT: .LBB52_12: # %cond.load10
5574 ; RV64ZVE32F-NEXT: vmv.x.s t0, v9
5575 ; RV64ZVE32F-NEXT: slli t0, t0, 3
5576 ; RV64ZVE32F-NEXT: add t0, a1, t0
5577 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
5578 ; RV64ZVE32F-NEXT: andi t1, a5, 32
5579 ; RV64ZVE32F-NEXT: beqz t1, .LBB52_9
5580 ; RV64ZVE32F-NEXT: .LBB52_13: # %cond.load13
5581 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
5582 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
5583 ; RV64ZVE32F-NEXT: slli t1, t1, 3
5584 ; RV64ZVE32F-NEXT: add t1, a1, t1
5585 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
5586 ; RV64ZVE32F-NEXT: .LBB52_14: # %else14
5587 ; RV64ZVE32F-NEXT: andi t2, a5, 64
5588 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
5589 ; RV64ZVE32F-NEXT: beqz t2, .LBB52_17
5590 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
5591 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5592 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5593 ; RV64ZVE32F-NEXT: add t2, a1, t2
5594 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
5595 ; RV64ZVE32F-NEXT: andi a5, a5, -128
5596 ; RV64ZVE32F-NEXT: bnez a5, .LBB52_18
5597 ; RV64ZVE32F-NEXT: .LBB52_16:
5598 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
5599 ; RV64ZVE32F-NEXT: j .LBB52_19
5600 ; RV64ZVE32F-NEXT: .LBB52_17:
5601 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
5602 ; RV64ZVE32F-NEXT: andi a5, a5, -128
5603 ; RV64ZVE32F-NEXT: beqz a5, .LBB52_16
5604 ; RV64ZVE32F-NEXT: .LBB52_18: # %cond.load19
5605 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5606 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
5607 ; RV64ZVE32F-NEXT: slli a2, a2, 3
5608 ; RV64ZVE32F-NEXT: add a1, a1, a2
5609 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
5610 ; RV64ZVE32F-NEXT: .LBB52_19: # %else20
5611 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5612 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
5613 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
5614 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
5615 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
5616 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
5617 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
5618 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
5619 ; RV64ZVE32F-NEXT: ret
5620 %eidxs = sext <8 x i16> %idxs to <8 x i64>
5621 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5622 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
5626 define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
5627 ; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8i64:
5629 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5630 ; RV32V-NEXT: vzext.vf2 v10, v8
5631 ; RV32V-NEXT: vsll.vi v8, v10, 3
5632 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
5633 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
5634 ; RV32V-NEXT: vmv.v.v v8, v12
5637 ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i64:
5639 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5640 ; RV64V-NEXT: vzext.vf2 v10, v8
5641 ; RV64V-NEXT: vsll.vi v8, v10, 3
5642 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
5643 ; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t
5644 ; RV64V-NEXT: vmv.v.v v8, v12
5647 ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64:
5648 ; RV32ZVE32F: # %bb.0:
5649 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5650 ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
5651 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
5652 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5653 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5654 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
5655 ; RV32ZVE32F-NEXT: andi a1, t0, 1
5656 ; RV32ZVE32F-NEXT: beqz a1, .LBB53_7
5657 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
5658 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5659 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
5660 ; RV32ZVE32F-NEXT: lw a1, 4(a3)
5661 ; RV32ZVE32F-NEXT: lw a3, 0(a3)
5662 ; RV32ZVE32F-NEXT: andi a4, t0, 2
5663 ; RV32ZVE32F-NEXT: bnez a4, .LBB53_8
5664 ; RV32ZVE32F-NEXT: .LBB53_2:
5665 ; RV32ZVE32F-NEXT: lw a4, 12(a2)
5666 ; RV32ZVE32F-NEXT: lw a5, 8(a2)
5667 ; RV32ZVE32F-NEXT: andi a6, t0, 4
5668 ; RV32ZVE32F-NEXT: bnez a6, .LBB53_9
5669 ; RV32ZVE32F-NEXT: .LBB53_3:
5670 ; RV32ZVE32F-NEXT: lw a6, 20(a2)
5671 ; RV32ZVE32F-NEXT: lw a7, 16(a2)
5672 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5673 ; RV32ZVE32F-NEXT: bnez t1, .LBB53_10
5674 ; RV32ZVE32F-NEXT: .LBB53_4:
5675 ; RV32ZVE32F-NEXT: lw t1, 28(a2)
5676 ; RV32ZVE32F-NEXT: lw t2, 24(a2)
5677 ; RV32ZVE32F-NEXT: andi t3, t0, 16
5678 ; RV32ZVE32F-NEXT: bnez t3, .LBB53_11
5679 ; RV32ZVE32F-NEXT: .LBB53_5:
5680 ; RV32ZVE32F-NEXT: lw t3, 36(a2)
5681 ; RV32ZVE32F-NEXT: lw t4, 32(a2)
5682 ; RV32ZVE32F-NEXT: andi t5, t0, 32
5683 ; RV32ZVE32F-NEXT: bnez t5, .LBB53_12
5684 ; RV32ZVE32F-NEXT: .LBB53_6:
5685 ; RV32ZVE32F-NEXT: lw t5, 44(a2)
5686 ; RV32ZVE32F-NEXT: lw t6, 40(a2)
5687 ; RV32ZVE32F-NEXT: j .LBB53_13
5688 ; RV32ZVE32F-NEXT: .LBB53_7:
5689 ; RV32ZVE32F-NEXT: lw a1, 4(a2)
5690 ; RV32ZVE32F-NEXT: lw a3, 0(a2)
5691 ; RV32ZVE32F-NEXT: andi a4, t0, 2
5692 ; RV32ZVE32F-NEXT: beqz a4, .LBB53_2
5693 ; RV32ZVE32F-NEXT: .LBB53_8: # %cond.load1
5694 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5695 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5696 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
5697 ; RV32ZVE32F-NEXT: lw a4, 4(a5)
5698 ; RV32ZVE32F-NEXT: lw a5, 0(a5)
5699 ; RV32ZVE32F-NEXT: andi a6, t0, 4
5700 ; RV32ZVE32F-NEXT: beqz a6, .LBB53_3
5701 ; RV32ZVE32F-NEXT: .LBB53_9: # %cond.load4
5702 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5703 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5704 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
5705 ; RV32ZVE32F-NEXT: lw a6, 4(a7)
5706 ; RV32ZVE32F-NEXT: lw a7, 0(a7)
5707 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5708 ; RV32ZVE32F-NEXT: beqz t1, .LBB53_4
5709 ; RV32ZVE32F-NEXT: .LBB53_10: # %cond.load7
5710 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5711 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5712 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
5713 ; RV32ZVE32F-NEXT: lw t1, 4(t2)
5714 ; RV32ZVE32F-NEXT: lw t2, 0(t2)
5715 ; RV32ZVE32F-NEXT: andi t3, t0, 16
5716 ; RV32ZVE32F-NEXT: beqz t3, .LBB53_5
5717 ; RV32ZVE32F-NEXT: .LBB53_11: # %cond.load10
5718 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5719 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
5720 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
5721 ; RV32ZVE32F-NEXT: lw t3, 4(t4)
5722 ; RV32ZVE32F-NEXT: lw t4, 0(t4)
5723 ; RV32ZVE32F-NEXT: andi t5, t0, 32
5724 ; RV32ZVE32F-NEXT: beqz t5, .LBB53_6
5725 ; RV32ZVE32F-NEXT: .LBB53_12: # %cond.load13
5726 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5727 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
5728 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
5729 ; RV32ZVE32F-NEXT: lw t5, 4(t6)
5730 ; RV32ZVE32F-NEXT: lw t6, 0(t6)
5731 ; RV32ZVE32F-NEXT: .LBB53_13: # %else14
5732 ; RV32ZVE32F-NEXT: addi sp, sp, -16
5733 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
5734 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
5735 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
5736 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
5737 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
5738 ; RV32ZVE32F-NEXT: andi s0, t0, 64
5739 ; RV32ZVE32F-NEXT: beqz s0, .LBB53_16
5740 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
5741 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5742 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
5743 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
5744 ; RV32ZVE32F-NEXT: lw s0, 4(s1)
5745 ; RV32ZVE32F-NEXT: lw s1, 0(s1)
5746 ; RV32ZVE32F-NEXT: andi t0, t0, -128
5747 ; RV32ZVE32F-NEXT: bnez t0, .LBB53_17
5748 ; RV32ZVE32F-NEXT: .LBB53_15:
5749 ; RV32ZVE32F-NEXT: lw t0, 60(a2)
5750 ; RV32ZVE32F-NEXT: lw a2, 56(a2)
5751 ; RV32ZVE32F-NEXT: j .LBB53_18
5752 ; RV32ZVE32F-NEXT: .LBB53_16:
5753 ; RV32ZVE32F-NEXT: lw s0, 52(a2)
5754 ; RV32ZVE32F-NEXT: lw s1, 48(a2)
5755 ; RV32ZVE32F-NEXT: andi t0, t0, -128
5756 ; RV32ZVE32F-NEXT: beqz t0, .LBB53_15
5757 ; RV32ZVE32F-NEXT: .LBB53_17: # %cond.load19
5758 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
5759 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
5760 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
5761 ; RV32ZVE32F-NEXT: lw t0, 4(a2)
5762 ; RV32ZVE32F-NEXT: lw a2, 0(a2)
5763 ; RV32ZVE32F-NEXT: .LBB53_18: # %else20
5764 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
5765 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
5766 ; RV32ZVE32F-NEXT: sw a5, 8(a0)
5767 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
5768 ; RV32ZVE32F-NEXT: sw a7, 16(a0)
5769 ; RV32ZVE32F-NEXT: sw a6, 20(a0)
5770 ; RV32ZVE32F-NEXT: sw t2, 24(a0)
5771 ; RV32ZVE32F-NEXT: sw t1, 28(a0)
5772 ; RV32ZVE32F-NEXT: sw t4, 32(a0)
5773 ; RV32ZVE32F-NEXT: sw t3, 36(a0)
5774 ; RV32ZVE32F-NEXT: sw t6, 40(a0)
5775 ; RV32ZVE32F-NEXT: sw t5, 44(a0)
5776 ; RV32ZVE32F-NEXT: sw s1, 48(a0)
5777 ; RV32ZVE32F-NEXT: sw s0, 52(a0)
5778 ; RV32ZVE32F-NEXT: sw a2, 56(a0)
5779 ; RV32ZVE32F-NEXT: sw t0, 60(a0)
5780 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
5781 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
5782 ; RV32ZVE32F-NEXT: addi sp, sp, 16
5783 ; RV32ZVE32F-NEXT: ret
5785 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64:
5786 ; RV64ZVE32F: # %bb.0:
5787 ; RV64ZVE32F-NEXT: lui a5, 16
5788 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5789 ; RV64ZVE32F-NEXT: vmv.x.s a6, v0
5790 ; RV64ZVE32F-NEXT: andi a3, a6, 1
5791 ; RV64ZVE32F-NEXT: addiw a5, a5, -1
5792 ; RV64ZVE32F-NEXT: beqz a3, .LBB53_3
5793 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
5794 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
5795 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
5796 ; RV64ZVE32F-NEXT: and a3, a3, a5
5797 ; RV64ZVE32F-NEXT: slli a3, a3, 3
5798 ; RV64ZVE32F-NEXT: add a3, a1, a3
5799 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
5800 ; RV64ZVE32F-NEXT: andi a4, a6, 2
5801 ; RV64ZVE32F-NEXT: bnez a4, .LBB53_4
5802 ; RV64ZVE32F-NEXT: .LBB53_2:
5803 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
5804 ; RV64ZVE32F-NEXT: j .LBB53_5
5805 ; RV64ZVE32F-NEXT: .LBB53_3:
5806 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
5807 ; RV64ZVE32F-NEXT: andi a4, a6, 2
5808 ; RV64ZVE32F-NEXT: beqz a4, .LBB53_2
5809 ; RV64ZVE32F-NEXT: .LBB53_4: # %cond.load1
5810 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
5811 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
5812 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
5813 ; RV64ZVE32F-NEXT: and a4, a4, a5
5814 ; RV64ZVE32F-NEXT: slli a4, a4, 3
5815 ; RV64ZVE32F-NEXT: add a4, a1, a4
5816 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
5817 ; RV64ZVE32F-NEXT: .LBB53_5: # %else2
5818 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
5819 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
5820 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
5821 ; RV64ZVE32F-NEXT: andi a7, a6, 4
5822 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
5823 ; RV64ZVE32F-NEXT: beqz a7, .LBB53_10
5824 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
5825 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
5826 ; RV64ZVE32F-NEXT: and a7, a7, a5
5827 ; RV64ZVE32F-NEXT: slli a7, a7, 3
5828 ; RV64ZVE32F-NEXT: add a7, a1, a7
5829 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
5830 ; RV64ZVE32F-NEXT: andi t0, a6, 8
5831 ; RV64ZVE32F-NEXT: bnez t0, .LBB53_11
5832 ; RV64ZVE32F-NEXT: .LBB53_7:
5833 ; RV64ZVE32F-NEXT: ld t0, 24(a2)
5834 ; RV64ZVE32F-NEXT: andi t1, a6, 16
5835 ; RV64ZVE32F-NEXT: bnez t1, .LBB53_12
5836 ; RV64ZVE32F-NEXT: .LBB53_8:
5837 ; RV64ZVE32F-NEXT: ld t1, 32(a2)
5838 ; RV64ZVE32F-NEXT: andi t2, a6, 32
5839 ; RV64ZVE32F-NEXT: bnez t2, .LBB53_13
5840 ; RV64ZVE32F-NEXT: .LBB53_9:
5841 ; RV64ZVE32F-NEXT: ld t2, 40(a2)
5842 ; RV64ZVE32F-NEXT: j .LBB53_14
5843 ; RV64ZVE32F-NEXT: .LBB53_10:
5844 ; RV64ZVE32F-NEXT: ld a7, 16(a2)
5845 ; RV64ZVE32F-NEXT: andi t0, a6, 8
5846 ; RV64ZVE32F-NEXT: beqz t0, .LBB53_7
5847 ; RV64ZVE32F-NEXT: .LBB53_11: # %cond.load7
5848 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5849 ; RV64ZVE32F-NEXT: vmv.x.s t0, v8
5850 ; RV64ZVE32F-NEXT: and t0, t0, a5
5851 ; RV64ZVE32F-NEXT: slli t0, t0, 3
5852 ; RV64ZVE32F-NEXT: add t0, a1, t0
5853 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
5854 ; RV64ZVE32F-NEXT: andi t1, a6, 16
5855 ; RV64ZVE32F-NEXT: beqz t1, .LBB53_8
5856 ; RV64ZVE32F-NEXT: .LBB53_12: # %cond.load10
5857 ; RV64ZVE32F-NEXT: vmv.x.s t1, v9
5858 ; RV64ZVE32F-NEXT: and t1, t1, a5
5859 ; RV64ZVE32F-NEXT: slli t1, t1, 3
5860 ; RV64ZVE32F-NEXT: add t1, a1, t1
5861 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
5862 ; RV64ZVE32F-NEXT: andi t2, a6, 32
5863 ; RV64ZVE32F-NEXT: beqz t2, .LBB53_9
5864 ; RV64ZVE32F-NEXT: .LBB53_13: # %cond.load13
5865 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
5866 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
5867 ; RV64ZVE32F-NEXT: and t2, t2, a5
5868 ; RV64ZVE32F-NEXT: slli t2, t2, 3
5869 ; RV64ZVE32F-NEXT: add t2, a1, t2
5870 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
5871 ; RV64ZVE32F-NEXT: .LBB53_14: # %else14
5872 ; RV64ZVE32F-NEXT: andi t3, a6, 64
5873 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
5874 ; RV64ZVE32F-NEXT: beqz t3, .LBB53_17
5875 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
5876 ; RV64ZVE32F-NEXT: vmv.x.s t3, v8
5877 ; RV64ZVE32F-NEXT: and t3, t3, a5
5878 ; RV64ZVE32F-NEXT: slli t3, t3, 3
5879 ; RV64ZVE32F-NEXT: add t3, a1, t3
5880 ; RV64ZVE32F-NEXT: ld t3, 0(t3)
5881 ; RV64ZVE32F-NEXT: andi a6, a6, -128
5882 ; RV64ZVE32F-NEXT: bnez a6, .LBB53_18
5883 ; RV64ZVE32F-NEXT: .LBB53_16:
5884 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
5885 ; RV64ZVE32F-NEXT: j .LBB53_19
5886 ; RV64ZVE32F-NEXT: .LBB53_17:
5887 ; RV64ZVE32F-NEXT: ld t3, 48(a2)
5888 ; RV64ZVE32F-NEXT: andi a6, a6, -128
5889 ; RV64ZVE32F-NEXT: beqz a6, .LBB53_16
5890 ; RV64ZVE32F-NEXT: .LBB53_18: # %cond.load19
5891 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
5892 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
5893 ; RV64ZVE32F-NEXT: and a2, a2, a5
5894 ; RV64ZVE32F-NEXT: slli a2, a2, 3
5895 ; RV64ZVE32F-NEXT: add a1, a1, a2
5896 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
5897 ; RV64ZVE32F-NEXT: .LBB53_19: # %else20
5898 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
5899 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
5900 ; RV64ZVE32F-NEXT: sd a7, 16(a0)
5901 ; RV64ZVE32F-NEXT: sd t0, 24(a0)
5902 ; RV64ZVE32F-NEXT: sd t1, 32(a0)
5903 ; RV64ZVE32F-NEXT: sd t2, 40(a0)
5904 ; RV64ZVE32F-NEXT: sd t3, 48(a0)
5905 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
5906 ; RV64ZVE32F-NEXT: ret
5907 %eidxs = zext <8 x i16> %idxs to <8 x i64>
5908 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
5909 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
5913 define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
5914 ; RV32V-LABEL: mgather_baseidx_v8i32_v8i64:
5916 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5917 ; RV32V-NEXT: vsll.vi v8, v8, 3
5918 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
5919 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
5920 ; RV32V-NEXT: vmv.v.v v8, v12
5923 ; RV64V-LABEL: mgather_baseidx_v8i32_v8i64:
5925 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
5926 ; RV64V-NEXT: vsext.vf2 v16, v8
5927 ; RV64V-NEXT: vsll.vi v8, v16, 3
5928 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
5929 ; RV64V-NEXT: vmv.v.v v8, v12
5932 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i32_v8i64:
5933 ; RV32ZVE32F: # %bb.0:
5934 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
5935 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
5936 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
5937 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
5938 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
5939 ; RV32ZVE32F-NEXT: andi a1, t0, 1
5940 ; RV32ZVE32F-NEXT: beqz a1, .LBB54_7
5941 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
5942 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
5943 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
5944 ; RV32ZVE32F-NEXT: lw a1, 4(a3)
5945 ; RV32ZVE32F-NEXT: lw a3, 0(a3)
5946 ; RV32ZVE32F-NEXT: andi a4, t0, 2
5947 ; RV32ZVE32F-NEXT: bnez a4, .LBB54_8
5948 ; RV32ZVE32F-NEXT: .LBB54_2:
5949 ; RV32ZVE32F-NEXT: lw a4, 12(a2)
5950 ; RV32ZVE32F-NEXT: lw a5, 8(a2)
5951 ; RV32ZVE32F-NEXT: andi a6, t0, 4
5952 ; RV32ZVE32F-NEXT: bnez a6, .LBB54_9
5953 ; RV32ZVE32F-NEXT: .LBB54_3:
5954 ; RV32ZVE32F-NEXT: lw a6, 20(a2)
5955 ; RV32ZVE32F-NEXT: lw a7, 16(a2)
5956 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5957 ; RV32ZVE32F-NEXT: bnez t1, .LBB54_10
5958 ; RV32ZVE32F-NEXT: .LBB54_4:
5959 ; RV32ZVE32F-NEXT: lw t1, 28(a2)
5960 ; RV32ZVE32F-NEXT: lw t2, 24(a2)
5961 ; RV32ZVE32F-NEXT: andi t3, t0, 16
5962 ; RV32ZVE32F-NEXT: bnez t3, .LBB54_11
5963 ; RV32ZVE32F-NEXT: .LBB54_5:
5964 ; RV32ZVE32F-NEXT: lw t3, 36(a2)
5965 ; RV32ZVE32F-NEXT: lw t4, 32(a2)
5966 ; RV32ZVE32F-NEXT: andi t5, t0, 32
5967 ; RV32ZVE32F-NEXT: bnez t5, .LBB54_12
5968 ; RV32ZVE32F-NEXT: .LBB54_6:
5969 ; RV32ZVE32F-NEXT: lw t5, 44(a2)
5970 ; RV32ZVE32F-NEXT: lw t6, 40(a2)
5971 ; RV32ZVE32F-NEXT: j .LBB54_13
5972 ; RV32ZVE32F-NEXT: .LBB54_7:
5973 ; RV32ZVE32F-NEXT: lw a1, 4(a2)
5974 ; RV32ZVE32F-NEXT: lw a3, 0(a2)
5975 ; RV32ZVE32F-NEXT: andi a4, t0, 2
5976 ; RV32ZVE32F-NEXT: beqz a4, .LBB54_2
5977 ; RV32ZVE32F-NEXT: .LBB54_8: # %cond.load1
5978 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5979 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
5980 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
5981 ; RV32ZVE32F-NEXT: lw a4, 4(a5)
5982 ; RV32ZVE32F-NEXT: lw a5, 0(a5)
5983 ; RV32ZVE32F-NEXT: andi a6, t0, 4
5984 ; RV32ZVE32F-NEXT: beqz a6, .LBB54_3
5985 ; RV32ZVE32F-NEXT: .LBB54_9: # %cond.load4
5986 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5987 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
5988 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
5989 ; RV32ZVE32F-NEXT: lw a6, 4(a7)
5990 ; RV32ZVE32F-NEXT: lw a7, 0(a7)
5991 ; RV32ZVE32F-NEXT: andi t1, t0, 8
5992 ; RV32ZVE32F-NEXT: beqz t1, .LBB54_4
5993 ; RV32ZVE32F-NEXT: .LBB54_10: # %cond.load7
5994 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
5995 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
5996 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
5997 ; RV32ZVE32F-NEXT: lw t1, 4(t2)
5998 ; RV32ZVE32F-NEXT: lw t2, 0(t2)
5999 ; RV32ZVE32F-NEXT: andi t3, t0, 16
6000 ; RV32ZVE32F-NEXT: beqz t3, .LBB54_5
6001 ; RV32ZVE32F-NEXT: .LBB54_11: # %cond.load10
6002 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6003 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
6004 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
6005 ; RV32ZVE32F-NEXT: lw t3, 4(t4)
6006 ; RV32ZVE32F-NEXT: lw t4, 0(t4)
6007 ; RV32ZVE32F-NEXT: andi t5, t0, 32
6008 ; RV32ZVE32F-NEXT: beqz t5, .LBB54_6
6009 ; RV32ZVE32F-NEXT: .LBB54_12: # %cond.load13
6010 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6011 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
6012 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
6013 ; RV32ZVE32F-NEXT: lw t5, 4(t6)
6014 ; RV32ZVE32F-NEXT: lw t6, 0(t6)
6015 ; RV32ZVE32F-NEXT: .LBB54_13: # %else14
6016 ; RV32ZVE32F-NEXT: addi sp, sp, -16
6017 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
6018 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
6019 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
6020 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
6021 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
6022 ; RV32ZVE32F-NEXT: andi s0, t0, 64
6023 ; RV32ZVE32F-NEXT: beqz s0, .LBB54_16
6024 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
6025 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6026 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6027 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
6028 ; RV32ZVE32F-NEXT: lw s0, 4(s1)
6029 ; RV32ZVE32F-NEXT: lw s1, 0(s1)
6030 ; RV32ZVE32F-NEXT: andi t0, t0, -128
6031 ; RV32ZVE32F-NEXT: bnez t0, .LBB54_17
6032 ; RV32ZVE32F-NEXT: .LBB54_15:
6033 ; RV32ZVE32F-NEXT: lw t0, 60(a2)
6034 ; RV32ZVE32F-NEXT: lw a2, 56(a2)
6035 ; RV32ZVE32F-NEXT: j .LBB54_18
6036 ; RV32ZVE32F-NEXT: .LBB54_16:
6037 ; RV32ZVE32F-NEXT: lw s0, 52(a2)
6038 ; RV32ZVE32F-NEXT: lw s1, 48(a2)
6039 ; RV32ZVE32F-NEXT: andi t0, t0, -128
6040 ; RV32ZVE32F-NEXT: beqz t0, .LBB54_15
6041 ; RV32ZVE32F-NEXT: .LBB54_17: # %cond.load19
6042 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6043 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6044 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
6045 ; RV32ZVE32F-NEXT: lw t0, 4(a2)
6046 ; RV32ZVE32F-NEXT: lw a2, 0(a2)
6047 ; RV32ZVE32F-NEXT: .LBB54_18: # %else20
6048 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
6049 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
6050 ; RV32ZVE32F-NEXT: sw a5, 8(a0)
6051 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
6052 ; RV32ZVE32F-NEXT: sw a7, 16(a0)
6053 ; RV32ZVE32F-NEXT: sw a6, 20(a0)
6054 ; RV32ZVE32F-NEXT: sw t2, 24(a0)
6055 ; RV32ZVE32F-NEXT: sw t1, 28(a0)
6056 ; RV32ZVE32F-NEXT: sw t4, 32(a0)
6057 ; RV32ZVE32F-NEXT: sw t3, 36(a0)
6058 ; RV32ZVE32F-NEXT: sw t6, 40(a0)
6059 ; RV32ZVE32F-NEXT: sw t5, 44(a0)
6060 ; RV32ZVE32F-NEXT: sw s1, 48(a0)
6061 ; RV32ZVE32F-NEXT: sw s0, 52(a0)
6062 ; RV32ZVE32F-NEXT: sw a2, 56(a0)
6063 ; RV32ZVE32F-NEXT: sw t0, 60(a0)
6064 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
6065 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
6066 ; RV32ZVE32F-NEXT: addi sp, sp, 16
6067 ; RV32ZVE32F-NEXT: ret
6069 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8i64:
6070 ; RV64ZVE32F: # %bb.0:
6071 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6072 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
6073 ; RV64ZVE32F-NEXT: andi a3, a5, 1
6074 ; RV64ZVE32F-NEXT: beqz a3, .LBB54_3
6075 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
6076 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6077 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
6078 ; RV64ZVE32F-NEXT: slli a3, a3, 3
6079 ; RV64ZVE32F-NEXT: add a3, a1, a3
6080 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
6081 ; RV64ZVE32F-NEXT: andi a4, a5, 2
6082 ; RV64ZVE32F-NEXT: bnez a4, .LBB54_4
6083 ; RV64ZVE32F-NEXT: .LBB54_2:
6084 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
6085 ; RV64ZVE32F-NEXT: j .LBB54_5
6086 ; RV64ZVE32F-NEXT: .LBB54_3:
6087 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
6088 ; RV64ZVE32F-NEXT: andi a4, a5, 2
6089 ; RV64ZVE32F-NEXT: beqz a4, .LBB54_2
6090 ; RV64ZVE32F-NEXT: .LBB54_4: # %cond.load1
6091 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6092 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6093 ; RV64ZVE32F-NEXT: vmv.x.s a4, v10
6094 ; RV64ZVE32F-NEXT: slli a4, a4, 3
6095 ; RV64ZVE32F-NEXT: add a4, a1, a4
6096 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
6097 ; RV64ZVE32F-NEXT: .LBB54_5: # %else2
6098 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
6099 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
6100 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
6101 ; RV64ZVE32F-NEXT: andi a6, a5, 4
6102 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
6103 ; RV64ZVE32F-NEXT: beqz a6, .LBB54_10
6104 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
6105 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
6106 ; RV64ZVE32F-NEXT: slli a6, a6, 3
6107 ; RV64ZVE32F-NEXT: add a6, a1, a6
6108 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
6109 ; RV64ZVE32F-NEXT: andi a7, a5, 8
6110 ; RV64ZVE32F-NEXT: bnez a7, .LBB54_11
6111 ; RV64ZVE32F-NEXT: .LBB54_7:
6112 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
6113 ; RV64ZVE32F-NEXT: andi t0, a5, 16
6114 ; RV64ZVE32F-NEXT: bnez t0, .LBB54_12
6115 ; RV64ZVE32F-NEXT: .LBB54_8:
6116 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
6117 ; RV64ZVE32F-NEXT: andi t1, a5, 32
6118 ; RV64ZVE32F-NEXT: bnez t1, .LBB54_13
6119 ; RV64ZVE32F-NEXT: .LBB54_9:
6120 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
6121 ; RV64ZVE32F-NEXT: j .LBB54_14
6122 ; RV64ZVE32F-NEXT: .LBB54_10:
6123 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
6124 ; RV64ZVE32F-NEXT: andi a7, a5, 8
6125 ; RV64ZVE32F-NEXT: beqz a7, .LBB54_7
6126 ; RV64ZVE32F-NEXT: .LBB54_11: # %cond.load7
6127 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6128 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
6129 ; RV64ZVE32F-NEXT: slli a7, a7, 3
6130 ; RV64ZVE32F-NEXT: add a7, a1, a7
6131 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
6132 ; RV64ZVE32F-NEXT: andi t0, a5, 16
6133 ; RV64ZVE32F-NEXT: beqz t0, .LBB54_8
6134 ; RV64ZVE32F-NEXT: .LBB54_12: # %cond.load10
6135 ; RV64ZVE32F-NEXT: vmv.x.s t0, v10
6136 ; RV64ZVE32F-NEXT: slli t0, t0, 3
6137 ; RV64ZVE32F-NEXT: add t0, a1, t0
6138 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
6139 ; RV64ZVE32F-NEXT: andi t1, a5, 32
6140 ; RV64ZVE32F-NEXT: beqz t1, .LBB54_9
6141 ; RV64ZVE32F-NEXT: .LBB54_13: # %cond.load13
6142 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
6143 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
6144 ; RV64ZVE32F-NEXT: slli t1, t1, 3
6145 ; RV64ZVE32F-NEXT: add t1, a1, t1
6146 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
6147 ; RV64ZVE32F-NEXT: .LBB54_14: # %else14
6148 ; RV64ZVE32F-NEXT: andi t2, a5, 64
6149 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
6150 ; RV64ZVE32F-NEXT: beqz t2, .LBB54_17
6151 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
6152 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
6153 ; RV64ZVE32F-NEXT: slli t2, t2, 3
6154 ; RV64ZVE32F-NEXT: add t2, a1, t2
6155 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
6156 ; RV64ZVE32F-NEXT: andi a5, a5, -128
6157 ; RV64ZVE32F-NEXT: bnez a5, .LBB54_18
6158 ; RV64ZVE32F-NEXT: .LBB54_16:
6159 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
6160 ; RV64ZVE32F-NEXT: j .LBB54_19
6161 ; RV64ZVE32F-NEXT: .LBB54_17:
6162 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
6163 ; RV64ZVE32F-NEXT: andi a5, a5, -128
6164 ; RV64ZVE32F-NEXT: beqz a5, .LBB54_16
6165 ; RV64ZVE32F-NEXT: .LBB54_18: # %cond.load19
6166 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6167 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
6168 ; RV64ZVE32F-NEXT: slli a2, a2, 3
6169 ; RV64ZVE32F-NEXT: add a1, a1, a2
6170 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
6171 ; RV64ZVE32F-NEXT: .LBB54_19: # %else20
6172 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
6173 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
6174 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
6175 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
6176 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
6177 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
6178 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
6179 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
6180 ; RV64ZVE32F-NEXT: ret
6181 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
6182 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
6186 define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
6187 ; RV32V-LABEL: mgather_baseidx_sext_v8i32_v8i64:
6189 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6190 ; RV32V-NEXT: vsll.vi v8, v8, 3
6191 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
6192 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
6193 ; RV32V-NEXT: vmv.v.v v8, v12
6196 ; RV64V-LABEL: mgather_baseidx_sext_v8i32_v8i64:
6198 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
6199 ; RV64V-NEXT: vsext.vf2 v16, v8
6200 ; RV64V-NEXT: vsll.vi v8, v16, 3
6201 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
6202 ; RV64V-NEXT: vmv.v.v v8, v12
6205 ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8i64:
6206 ; RV32ZVE32F: # %bb.0:
6207 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6208 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
6209 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
6210 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6211 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
6212 ; RV32ZVE32F-NEXT: andi a1, t0, 1
6213 ; RV32ZVE32F-NEXT: beqz a1, .LBB55_7
6214 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
6215 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6216 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
6217 ; RV32ZVE32F-NEXT: lw a1, 4(a3)
6218 ; RV32ZVE32F-NEXT: lw a3, 0(a3)
6219 ; RV32ZVE32F-NEXT: andi a4, t0, 2
6220 ; RV32ZVE32F-NEXT: bnez a4, .LBB55_8
6221 ; RV32ZVE32F-NEXT: .LBB55_2:
6222 ; RV32ZVE32F-NEXT: lw a4, 12(a2)
6223 ; RV32ZVE32F-NEXT: lw a5, 8(a2)
6224 ; RV32ZVE32F-NEXT: andi a6, t0, 4
6225 ; RV32ZVE32F-NEXT: bnez a6, .LBB55_9
6226 ; RV32ZVE32F-NEXT: .LBB55_3:
6227 ; RV32ZVE32F-NEXT: lw a6, 20(a2)
6228 ; RV32ZVE32F-NEXT: lw a7, 16(a2)
6229 ; RV32ZVE32F-NEXT: andi t1, t0, 8
6230 ; RV32ZVE32F-NEXT: bnez t1, .LBB55_10
6231 ; RV32ZVE32F-NEXT: .LBB55_4:
6232 ; RV32ZVE32F-NEXT: lw t1, 28(a2)
6233 ; RV32ZVE32F-NEXT: lw t2, 24(a2)
6234 ; RV32ZVE32F-NEXT: andi t3, t0, 16
6235 ; RV32ZVE32F-NEXT: bnez t3, .LBB55_11
6236 ; RV32ZVE32F-NEXT: .LBB55_5:
6237 ; RV32ZVE32F-NEXT: lw t3, 36(a2)
6238 ; RV32ZVE32F-NEXT: lw t4, 32(a2)
6239 ; RV32ZVE32F-NEXT: andi t5, t0, 32
6240 ; RV32ZVE32F-NEXT: bnez t5, .LBB55_12
6241 ; RV32ZVE32F-NEXT: .LBB55_6:
6242 ; RV32ZVE32F-NEXT: lw t5, 44(a2)
6243 ; RV32ZVE32F-NEXT: lw t6, 40(a2)
6244 ; RV32ZVE32F-NEXT: j .LBB55_13
6245 ; RV32ZVE32F-NEXT: .LBB55_7:
6246 ; RV32ZVE32F-NEXT: lw a1, 4(a2)
6247 ; RV32ZVE32F-NEXT: lw a3, 0(a2)
6248 ; RV32ZVE32F-NEXT: andi a4, t0, 2
6249 ; RV32ZVE32F-NEXT: beqz a4, .LBB55_2
6250 ; RV32ZVE32F-NEXT: .LBB55_8: # %cond.load1
6251 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6252 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6253 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
6254 ; RV32ZVE32F-NEXT: lw a4, 4(a5)
6255 ; RV32ZVE32F-NEXT: lw a5, 0(a5)
6256 ; RV32ZVE32F-NEXT: andi a6, t0, 4
6257 ; RV32ZVE32F-NEXT: beqz a6, .LBB55_3
6258 ; RV32ZVE32F-NEXT: .LBB55_9: # %cond.load4
6259 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6260 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
6261 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
6262 ; RV32ZVE32F-NEXT: lw a6, 4(a7)
6263 ; RV32ZVE32F-NEXT: lw a7, 0(a7)
6264 ; RV32ZVE32F-NEXT: andi t1, t0, 8
6265 ; RV32ZVE32F-NEXT: beqz t1, .LBB55_4
6266 ; RV32ZVE32F-NEXT: .LBB55_10: # %cond.load7
6267 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6268 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
6269 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
6270 ; RV32ZVE32F-NEXT: lw t1, 4(t2)
6271 ; RV32ZVE32F-NEXT: lw t2, 0(t2)
6272 ; RV32ZVE32F-NEXT: andi t3, t0, 16
6273 ; RV32ZVE32F-NEXT: beqz t3, .LBB55_5
6274 ; RV32ZVE32F-NEXT: .LBB55_11: # %cond.load10
6275 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6276 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
6277 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
6278 ; RV32ZVE32F-NEXT: lw t3, 4(t4)
6279 ; RV32ZVE32F-NEXT: lw t4, 0(t4)
6280 ; RV32ZVE32F-NEXT: andi t5, t0, 32
6281 ; RV32ZVE32F-NEXT: beqz t5, .LBB55_6
6282 ; RV32ZVE32F-NEXT: .LBB55_12: # %cond.load13
6283 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6284 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
6285 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
6286 ; RV32ZVE32F-NEXT: lw t5, 4(t6)
6287 ; RV32ZVE32F-NEXT: lw t6, 0(t6)
6288 ; RV32ZVE32F-NEXT: .LBB55_13: # %else14
6289 ; RV32ZVE32F-NEXT: addi sp, sp, -16
6290 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
6291 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
6292 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
6293 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
6294 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
6295 ; RV32ZVE32F-NEXT: andi s0, t0, 64
6296 ; RV32ZVE32F-NEXT: beqz s0, .LBB55_16
6297 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
6298 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6299 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6300 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
6301 ; RV32ZVE32F-NEXT: lw s0, 4(s1)
6302 ; RV32ZVE32F-NEXT: lw s1, 0(s1)
6303 ; RV32ZVE32F-NEXT: andi t0, t0, -128
6304 ; RV32ZVE32F-NEXT: bnez t0, .LBB55_17
6305 ; RV32ZVE32F-NEXT: .LBB55_15:
6306 ; RV32ZVE32F-NEXT: lw t0, 60(a2)
6307 ; RV32ZVE32F-NEXT: lw a2, 56(a2)
6308 ; RV32ZVE32F-NEXT: j .LBB55_18
6309 ; RV32ZVE32F-NEXT: .LBB55_16:
6310 ; RV32ZVE32F-NEXT: lw s0, 52(a2)
6311 ; RV32ZVE32F-NEXT: lw s1, 48(a2)
6312 ; RV32ZVE32F-NEXT: andi t0, t0, -128
6313 ; RV32ZVE32F-NEXT: beqz t0, .LBB55_15
6314 ; RV32ZVE32F-NEXT: .LBB55_17: # %cond.load19
6315 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6316 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6317 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
6318 ; RV32ZVE32F-NEXT: lw t0, 4(a2)
6319 ; RV32ZVE32F-NEXT: lw a2, 0(a2)
6320 ; RV32ZVE32F-NEXT: .LBB55_18: # %else20
6321 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
6322 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
6323 ; RV32ZVE32F-NEXT: sw a5, 8(a0)
6324 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
6325 ; RV32ZVE32F-NEXT: sw a7, 16(a0)
6326 ; RV32ZVE32F-NEXT: sw a6, 20(a0)
6327 ; RV32ZVE32F-NEXT: sw t2, 24(a0)
6328 ; RV32ZVE32F-NEXT: sw t1, 28(a0)
6329 ; RV32ZVE32F-NEXT: sw t4, 32(a0)
6330 ; RV32ZVE32F-NEXT: sw t3, 36(a0)
6331 ; RV32ZVE32F-NEXT: sw t6, 40(a0)
6332 ; RV32ZVE32F-NEXT: sw t5, 44(a0)
6333 ; RV32ZVE32F-NEXT: sw s1, 48(a0)
6334 ; RV32ZVE32F-NEXT: sw s0, 52(a0)
6335 ; RV32ZVE32F-NEXT: sw a2, 56(a0)
6336 ; RV32ZVE32F-NEXT: sw t0, 60(a0)
6337 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
6338 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
6339 ; RV32ZVE32F-NEXT: addi sp, sp, 16
6340 ; RV32ZVE32F-NEXT: ret
6342 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8i64:
6343 ; RV64ZVE32F: # %bb.0:
6344 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6345 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
6346 ; RV64ZVE32F-NEXT: andi a3, a5, 1
6347 ; RV64ZVE32F-NEXT: beqz a3, .LBB55_3
6348 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
6349 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6350 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
6351 ; RV64ZVE32F-NEXT: slli a3, a3, 3
6352 ; RV64ZVE32F-NEXT: add a3, a1, a3
6353 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
6354 ; RV64ZVE32F-NEXT: andi a4, a5, 2
6355 ; RV64ZVE32F-NEXT: bnez a4, .LBB55_4
6356 ; RV64ZVE32F-NEXT: .LBB55_2:
6357 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
6358 ; RV64ZVE32F-NEXT: j .LBB55_5
6359 ; RV64ZVE32F-NEXT: .LBB55_3:
6360 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
6361 ; RV64ZVE32F-NEXT: andi a4, a5, 2
6362 ; RV64ZVE32F-NEXT: beqz a4, .LBB55_2
6363 ; RV64ZVE32F-NEXT: .LBB55_4: # %cond.load1
6364 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6365 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6366 ; RV64ZVE32F-NEXT: vmv.x.s a4, v10
6367 ; RV64ZVE32F-NEXT: slli a4, a4, 3
6368 ; RV64ZVE32F-NEXT: add a4, a1, a4
6369 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
6370 ; RV64ZVE32F-NEXT: .LBB55_5: # %else2
6371 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
6372 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
6373 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
6374 ; RV64ZVE32F-NEXT: andi a6, a5, 4
6375 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
6376 ; RV64ZVE32F-NEXT: beqz a6, .LBB55_10
6377 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
6378 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
6379 ; RV64ZVE32F-NEXT: slli a6, a6, 3
6380 ; RV64ZVE32F-NEXT: add a6, a1, a6
6381 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
6382 ; RV64ZVE32F-NEXT: andi a7, a5, 8
6383 ; RV64ZVE32F-NEXT: bnez a7, .LBB55_11
6384 ; RV64ZVE32F-NEXT: .LBB55_7:
6385 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
6386 ; RV64ZVE32F-NEXT: andi t0, a5, 16
6387 ; RV64ZVE32F-NEXT: bnez t0, .LBB55_12
6388 ; RV64ZVE32F-NEXT: .LBB55_8:
6389 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
6390 ; RV64ZVE32F-NEXT: andi t1, a5, 32
6391 ; RV64ZVE32F-NEXT: bnez t1, .LBB55_13
6392 ; RV64ZVE32F-NEXT: .LBB55_9:
6393 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
6394 ; RV64ZVE32F-NEXT: j .LBB55_14
6395 ; RV64ZVE32F-NEXT: .LBB55_10:
6396 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
6397 ; RV64ZVE32F-NEXT: andi a7, a5, 8
6398 ; RV64ZVE32F-NEXT: beqz a7, .LBB55_7
6399 ; RV64ZVE32F-NEXT: .LBB55_11: # %cond.load7
6400 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6401 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
6402 ; RV64ZVE32F-NEXT: slli a7, a7, 3
6403 ; RV64ZVE32F-NEXT: add a7, a1, a7
6404 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
6405 ; RV64ZVE32F-NEXT: andi t0, a5, 16
6406 ; RV64ZVE32F-NEXT: beqz t0, .LBB55_8
6407 ; RV64ZVE32F-NEXT: .LBB55_12: # %cond.load10
6408 ; RV64ZVE32F-NEXT: vmv.x.s t0, v10
6409 ; RV64ZVE32F-NEXT: slli t0, t0, 3
6410 ; RV64ZVE32F-NEXT: add t0, a1, t0
6411 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
6412 ; RV64ZVE32F-NEXT: andi t1, a5, 32
6413 ; RV64ZVE32F-NEXT: beqz t1, .LBB55_9
6414 ; RV64ZVE32F-NEXT: .LBB55_13: # %cond.load13
6415 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
6416 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
6417 ; RV64ZVE32F-NEXT: slli t1, t1, 3
6418 ; RV64ZVE32F-NEXT: add t1, a1, t1
6419 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
6420 ; RV64ZVE32F-NEXT: .LBB55_14: # %else14
6421 ; RV64ZVE32F-NEXT: andi t2, a5, 64
6422 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
6423 ; RV64ZVE32F-NEXT: beqz t2, .LBB55_17
6424 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
6425 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
6426 ; RV64ZVE32F-NEXT: slli t2, t2, 3
6427 ; RV64ZVE32F-NEXT: add t2, a1, t2
6428 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
6429 ; RV64ZVE32F-NEXT: andi a5, a5, -128
6430 ; RV64ZVE32F-NEXT: bnez a5, .LBB55_18
6431 ; RV64ZVE32F-NEXT: .LBB55_16:
6432 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
6433 ; RV64ZVE32F-NEXT: j .LBB55_19
6434 ; RV64ZVE32F-NEXT: .LBB55_17:
6435 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
6436 ; RV64ZVE32F-NEXT: andi a5, a5, -128
6437 ; RV64ZVE32F-NEXT: beqz a5, .LBB55_16
6438 ; RV64ZVE32F-NEXT: .LBB55_18: # %cond.load19
6439 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6440 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
6441 ; RV64ZVE32F-NEXT: slli a2, a2, 3
6442 ; RV64ZVE32F-NEXT: add a1, a1, a2
6443 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
6444 ; RV64ZVE32F-NEXT: .LBB55_19: # %else20
6445 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
6446 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
6447 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
6448 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
6449 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
6450 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
6451 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
6452 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
6453 ; RV64ZVE32F-NEXT: ret
6454 %eidxs = sext <8 x i32> %idxs to <8 x i64>
6455 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
6456 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
6460 define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
6461 ; RV32V-LABEL: mgather_baseidx_zext_v8i32_v8i64:
6463 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6464 ; RV32V-NEXT: vsll.vi v8, v8, 3
6465 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
6466 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
6467 ; RV32V-NEXT: vmv.v.v v8, v12
6470 ; RV64V-LABEL: mgather_baseidx_zext_v8i32_v8i64:
6472 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
6473 ; RV64V-NEXT: vzext.vf2 v16, v8
6474 ; RV64V-NEXT: vsll.vi v8, v16, 3
6475 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
6476 ; RV64V-NEXT: vmv.v.v v8, v12
6479 ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8i64:
6480 ; RV32ZVE32F: # %bb.0:
6481 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6482 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
6483 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
6484 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6485 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
6486 ; RV32ZVE32F-NEXT: andi a1, t0, 1
6487 ; RV32ZVE32F-NEXT: beqz a1, .LBB56_7
6488 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
6489 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6490 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
6491 ; RV32ZVE32F-NEXT: lw a1, 4(a3)
6492 ; RV32ZVE32F-NEXT: lw a3, 0(a3)
6493 ; RV32ZVE32F-NEXT: andi a4, t0, 2
6494 ; RV32ZVE32F-NEXT: bnez a4, .LBB56_8
6495 ; RV32ZVE32F-NEXT: .LBB56_2:
6496 ; RV32ZVE32F-NEXT: lw a4, 12(a2)
6497 ; RV32ZVE32F-NEXT: lw a5, 8(a2)
6498 ; RV32ZVE32F-NEXT: andi a6, t0, 4
6499 ; RV32ZVE32F-NEXT: bnez a6, .LBB56_9
6500 ; RV32ZVE32F-NEXT: .LBB56_3:
6501 ; RV32ZVE32F-NEXT: lw a6, 20(a2)
6502 ; RV32ZVE32F-NEXT: lw a7, 16(a2)
6503 ; RV32ZVE32F-NEXT: andi t1, t0, 8
6504 ; RV32ZVE32F-NEXT: bnez t1, .LBB56_10
6505 ; RV32ZVE32F-NEXT: .LBB56_4:
6506 ; RV32ZVE32F-NEXT: lw t1, 28(a2)
6507 ; RV32ZVE32F-NEXT: lw t2, 24(a2)
6508 ; RV32ZVE32F-NEXT: andi t3, t0, 16
6509 ; RV32ZVE32F-NEXT: bnez t3, .LBB56_11
6510 ; RV32ZVE32F-NEXT: .LBB56_5:
6511 ; RV32ZVE32F-NEXT: lw t3, 36(a2)
6512 ; RV32ZVE32F-NEXT: lw t4, 32(a2)
6513 ; RV32ZVE32F-NEXT: andi t5, t0, 32
6514 ; RV32ZVE32F-NEXT: bnez t5, .LBB56_12
6515 ; RV32ZVE32F-NEXT: .LBB56_6:
6516 ; RV32ZVE32F-NEXT: lw t5, 44(a2)
6517 ; RV32ZVE32F-NEXT: lw t6, 40(a2)
6518 ; RV32ZVE32F-NEXT: j .LBB56_13
6519 ; RV32ZVE32F-NEXT: .LBB56_7:
6520 ; RV32ZVE32F-NEXT: lw a1, 4(a2)
6521 ; RV32ZVE32F-NEXT: lw a3, 0(a2)
6522 ; RV32ZVE32F-NEXT: andi a4, t0, 2
6523 ; RV32ZVE32F-NEXT: beqz a4, .LBB56_2
6524 ; RV32ZVE32F-NEXT: .LBB56_8: # %cond.load1
6525 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6526 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6527 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
6528 ; RV32ZVE32F-NEXT: lw a4, 4(a5)
6529 ; RV32ZVE32F-NEXT: lw a5, 0(a5)
6530 ; RV32ZVE32F-NEXT: andi a6, t0, 4
6531 ; RV32ZVE32F-NEXT: beqz a6, .LBB56_3
6532 ; RV32ZVE32F-NEXT: .LBB56_9: # %cond.load4
6533 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6534 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
6535 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
6536 ; RV32ZVE32F-NEXT: lw a6, 4(a7)
6537 ; RV32ZVE32F-NEXT: lw a7, 0(a7)
6538 ; RV32ZVE32F-NEXT: andi t1, t0, 8
6539 ; RV32ZVE32F-NEXT: beqz t1, .LBB56_4
6540 ; RV32ZVE32F-NEXT: .LBB56_10: # %cond.load7
6541 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6542 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
6543 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
6544 ; RV32ZVE32F-NEXT: lw t1, 4(t2)
6545 ; RV32ZVE32F-NEXT: lw t2, 0(t2)
6546 ; RV32ZVE32F-NEXT: andi t3, t0, 16
6547 ; RV32ZVE32F-NEXT: beqz t3, .LBB56_5
6548 ; RV32ZVE32F-NEXT: .LBB56_11: # %cond.load10
6549 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6550 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
6551 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
6552 ; RV32ZVE32F-NEXT: lw t3, 4(t4)
6553 ; RV32ZVE32F-NEXT: lw t4, 0(t4)
6554 ; RV32ZVE32F-NEXT: andi t5, t0, 32
6555 ; RV32ZVE32F-NEXT: beqz t5, .LBB56_6
6556 ; RV32ZVE32F-NEXT: .LBB56_12: # %cond.load13
6557 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6558 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
6559 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
6560 ; RV32ZVE32F-NEXT: lw t5, 4(t6)
6561 ; RV32ZVE32F-NEXT: lw t6, 0(t6)
6562 ; RV32ZVE32F-NEXT: .LBB56_13: # %else14
6563 ; RV32ZVE32F-NEXT: addi sp, sp, -16
6564 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
6565 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
6566 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
6567 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
6568 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
6569 ; RV32ZVE32F-NEXT: andi s0, t0, 64
6570 ; RV32ZVE32F-NEXT: beqz s0, .LBB56_16
6571 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
6572 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6573 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6574 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
6575 ; RV32ZVE32F-NEXT: lw s0, 4(s1)
6576 ; RV32ZVE32F-NEXT: lw s1, 0(s1)
6577 ; RV32ZVE32F-NEXT: andi t0, t0, -128
6578 ; RV32ZVE32F-NEXT: bnez t0, .LBB56_17
6579 ; RV32ZVE32F-NEXT: .LBB56_15:
6580 ; RV32ZVE32F-NEXT: lw t0, 60(a2)
6581 ; RV32ZVE32F-NEXT: lw a2, 56(a2)
6582 ; RV32ZVE32F-NEXT: j .LBB56_18
6583 ; RV32ZVE32F-NEXT: .LBB56_16:
6584 ; RV32ZVE32F-NEXT: lw s0, 52(a2)
6585 ; RV32ZVE32F-NEXT: lw s1, 48(a2)
6586 ; RV32ZVE32F-NEXT: andi t0, t0, -128
6587 ; RV32ZVE32F-NEXT: beqz t0, .LBB56_15
6588 ; RV32ZVE32F-NEXT: .LBB56_17: # %cond.load19
6589 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6590 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6591 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
6592 ; RV32ZVE32F-NEXT: lw t0, 4(a2)
6593 ; RV32ZVE32F-NEXT: lw a2, 0(a2)
6594 ; RV32ZVE32F-NEXT: .LBB56_18: # %else20
6595 ; RV32ZVE32F-NEXT: sw a3, 0(a0)
6596 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
6597 ; RV32ZVE32F-NEXT: sw a5, 8(a0)
6598 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
6599 ; RV32ZVE32F-NEXT: sw a7, 16(a0)
6600 ; RV32ZVE32F-NEXT: sw a6, 20(a0)
6601 ; RV32ZVE32F-NEXT: sw t2, 24(a0)
6602 ; RV32ZVE32F-NEXT: sw t1, 28(a0)
6603 ; RV32ZVE32F-NEXT: sw t4, 32(a0)
6604 ; RV32ZVE32F-NEXT: sw t3, 36(a0)
6605 ; RV32ZVE32F-NEXT: sw t6, 40(a0)
6606 ; RV32ZVE32F-NEXT: sw t5, 44(a0)
6607 ; RV32ZVE32F-NEXT: sw s1, 48(a0)
6608 ; RV32ZVE32F-NEXT: sw s0, 52(a0)
6609 ; RV32ZVE32F-NEXT: sw a2, 56(a0)
6610 ; RV32ZVE32F-NEXT: sw t0, 60(a0)
6611 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
6612 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
6613 ; RV32ZVE32F-NEXT: addi sp, sp, 16
6614 ; RV32ZVE32F-NEXT: ret
6616 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8i64:
6617 ; RV64ZVE32F: # %bb.0:
6618 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6619 ; RV64ZVE32F-NEXT: vmv.x.s a5, v0
6620 ; RV64ZVE32F-NEXT: andi a3, a5, 1
6621 ; RV64ZVE32F-NEXT: beqz a3, .LBB56_3
6622 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
6623 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6624 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
6625 ; RV64ZVE32F-NEXT: slli a3, a3, 32
6626 ; RV64ZVE32F-NEXT: srli a3, a3, 29
6627 ; RV64ZVE32F-NEXT: add a3, a1, a3
6628 ; RV64ZVE32F-NEXT: ld a3, 0(a3)
6629 ; RV64ZVE32F-NEXT: andi a4, a5, 2
6630 ; RV64ZVE32F-NEXT: bnez a4, .LBB56_4
6631 ; RV64ZVE32F-NEXT: .LBB56_2:
6632 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
6633 ; RV64ZVE32F-NEXT: j .LBB56_5
6634 ; RV64ZVE32F-NEXT: .LBB56_3:
6635 ; RV64ZVE32F-NEXT: ld a3, 0(a2)
6636 ; RV64ZVE32F-NEXT: andi a4, a5, 2
6637 ; RV64ZVE32F-NEXT: beqz a4, .LBB56_2
6638 ; RV64ZVE32F-NEXT: .LBB56_4: # %cond.load1
6639 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6640 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6641 ; RV64ZVE32F-NEXT: vmv.x.s a4, v10
6642 ; RV64ZVE32F-NEXT: slli a4, a4, 32
6643 ; RV64ZVE32F-NEXT: srli a4, a4, 29
6644 ; RV64ZVE32F-NEXT: add a4, a1, a4
6645 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
6646 ; RV64ZVE32F-NEXT: .LBB56_5: # %else2
6647 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
6648 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
6649 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
6650 ; RV64ZVE32F-NEXT: andi a6, a5, 4
6651 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
6652 ; RV64ZVE32F-NEXT: beqz a6, .LBB56_10
6653 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
6654 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8
6655 ; RV64ZVE32F-NEXT: slli a6, a6, 32
6656 ; RV64ZVE32F-NEXT: srli a6, a6, 29
6657 ; RV64ZVE32F-NEXT: add a6, a1, a6
6658 ; RV64ZVE32F-NEXT: ld a6, 0(a6)
6659 ; RV64ZVE32F-NEXT: andi a7, a5, 8
6660 ; RV64ZVE32F-NEXT: bnez a7, .LBB56_11
6661 ; RV64ZVE32F-NEXT: .LBB56_7:
6662 ; RV64ZVE32F-NEXT: ld a7, 24(a2)
6663 ; RV64ZVE32F-NEXT: andi t0, a5, 16
6664 ; RV64ZVE32F-NEXT: bnez t0, .LBB56_12
6665 ; RV64ZVE32F-NEXT: .LBB56_8:
6666 ; RV64ZVE32F-NEXT: ld t0, 32(a2)
6667 ; RV64ZVE32F-NEXT: andi t1, a5, 32
6668 ; RV64ZVE32F-NEXT: bnez t1, .LBB56_13
6669 ; RV64ZVE32F-NEXT: .LBB56_9:
6670 ; RV64ZVE32F-NEXT: ld t1, 40(a2)
6671 ; RV64ZVE32F-NEXT: j .LBB56_14
6672 ; RV64ZVE32F-NEXT: .LBB56_10:
6673 ; RV64ZVE32F-NEXT: ld a6, 16(a2)
6674 ; RV64ZVE32F-NEXT: andi a7, a5, 8
6675 ; RV64ZVE32F-NEXT: beqz a7, .LBB56_7
6676 ; RV64ZVE32F-NEXT: .LBB56_11: # %cond.load7
6677 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6678 ; RV64ZVE32F-NEXT: vmv.x.s a7, v8
6679 ; RV64ZVE32F-NEXT: slli a7, a7, 32
6680 ; RV64ZVE32F-NEXT: srli a7, a7, 29
6681 ; RV64ZVE32F-NEXT: add a7, a1, a7
6682 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
6683 ; RV64ZVE32F-NEXT: andi t0, a5, 16
6684 ; RV64ZVE32F-NEXT: beqz t0, .LBB56_8
6685 ; RV64ZVE32F-NEXT: .LBB56_12: # %cond.load10
6686 ; RV64ZVE32F-NEXT: vmv.x.s t0, v10
6687 ; RV64ZVE32F-NEXT: slli t0, t0, 32
6688 ; RV64ZVE32F-NEXT: srli t0, t0, 29
6689 ; RV64ZVE32F-NEXT: add t0, a1, t0
6690 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
6691 ; RV64ZVE32F-NEXT: andi t1, a5, 32
6692 ; RV64ZVE32F-NEXT: beqz t1, .LBB56_9
6693 ; RV64ZVE32F-NEXT: .LBB56_13: # %cond.load13
6694 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
6695 ; RV64ZVE32F-NEXT: vmv.x.s t1, v8
6696 ; RV64ZVE32F-NEXT: slli t1, t1, 32
6697 ; RV64ZVE32F-NEXT: srli t1, t1, 29
6698 ; RV64ZVE32F-NEXT: add t1, a1, t1
6699 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
6700 ; RV64ZVE32F-NEXT: .LBB56_14: # %else14
6701 ; RV64ZVE32F-NEXT: andi t2, a5, 64
6702 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
6703 ; RV64ZVE32F-NEXT: beqz t2, .LBB56_17
6704 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
6705 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8
6706 ; RV64ZVE32F-NEXT: slli t2, t2, 32
6707 ; RV64ZVE32F-NEXT: srli t2, t2, 29
6708 ; RV64ZVE32F-NEXT: add t2, a1, t2
6709 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
6710 ; RV64ZVE32F-NEXT: andi a5, a5, -128
6711 ; RV64ZVE32F-NEXT: bnez a5, .LBB56_18
6712 ; RV64ZVE32F-NEXT: .LBB56_16:
6713 ; RV64ZVE32F-NEXT: ld a1, 56(a2)
6714 ; RV64ZVE32F-NEXT: j .LBB56_19
6715 ; RV64ZVE32F-NEXT: .LBB56_17:
6716 ; RV64ZVE32F-NEXT: ld t2, 48(a2)
6717 ; RV64ZVE32F-NEXT: andi a5, a5, -128
6718 ; RV64ZVE32F-NEXT: beqz a5, .LBB56_16
6719 ; RV64ZVE32F-NEXT: .LBB56_18: # %cond.load19
6720 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
6721 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
6722 ; RV64ZVE32F-NEXT: slli a2, a2, 32
6723 ; RV64ZVE32F-NEXT: srli a2, a2, 29
6724 ; RV64ZVE32F-NEXT: add a1, a1, a2
6725 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
6726 ; RV64ZVE32F-NEXT: .LBB56_19: # %else20
6727 ; RV64ZVE32F-NEXT: sd a3, 0(a0)
6728 ; RV64ZVE32F-NEXT: sd a4, 8(a0)
6729 ; RV64ZVE32F-NEXT: sd a6, 16(a0)
6730 ; RV64ZVE32F-NEXT: sd a7, 24(a0)
6731 ; RV64ZVE32F-NEXT: sd t0, 32(a0)
6732 ; RV64ZVE32F-NEXT: sd t1, 40(a0)
6733 ; RV64ZVE32F-NEXT: sd t2, 48(a0)
6734 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
6735 ; RV64ZVE32F-NEXT: ret
6736 %eidxs = zext <8 x i32> %idxs to <8 x i64>
6737 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
6738 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
6742 define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
6743 ; RV32V-LABEL: mgather_baseidx_v8i64:
6745 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6746 ; RV32V-NEXT: vnsrl.wi v16, v8, 0
6747 ; RV32V-NEXT: vsll.vi v8, v16, 3
6748 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
6749 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
6750 ; RV32V-NEXT: vmv.v.v v8, v12
6753 ; RV64V-LABEL: mgather_baseidx_v8i64:
6755 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
6756 ; RV64V-NEXT: vsll.vi v8, v8, 3
6757 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
6758 ; RV64V-NEXT: vmv.v.v v8, v12
6761 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i64:
6762 ; RV32ZVE32F: # %bb.0:
6763 ; RV32ZVE32F-NEXT: lw a4, 56(a2)
6764 ; RV32ZVE32F-NEXT: lw a5, 48(a2)
6765 ; RV32ZVE32F-NEXT: lw a6, 40(a2)
6766 ; RV32ZVE32F-NEXT: lw a7, 32(a2)
6767 ; RV32ZVE32F-NEXT: lw t0, 24(a2)
6768 ; RV32ZVE32F-NEXT: lw t1, 0(a2)
6769 ; RV32ZVE32F-NEXT: lw t2, 8(a2)
6770 ; RV32ZVE32F-NEXT: lw a2, 16(a2)
6771 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
6772 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
6773 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t2
6774 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
6775 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
6776 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
6777 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
6778 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
6779 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
6780 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
6781 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
6782 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6783 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0
6784 ; RV32ZVE32F-NEXT: andi a1, t0, 1
6785 ; RV32ZVE32F-NEXT: beqz a1, .LBB57_7
6786 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
6787 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
6788 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
6789 ; RV32ZVE32F-NEXT: lw a1, 4(a2)
6790 ; RV32ZVE32F-NEXT: lw a2, 0(a2)
6791 ; RV32ZVE32F-NEXT: andi a4, t0, 2
6792 ; RV32ZVE32F-NEXT: bnez a4, .LBB57_8
6793 ; RV32ZVE32F-NEXT: .LBB57_2:
6794 ; RV32ZVE32F-NEXT: lw a4, 12(a3)
6795 ; RV32ZVE32F-NEXT: lw a5, 8(a3)
6796 ; RV32ZVE32F-NEXT: andi a6, t0, 4
6797 ; RV32ZVE32F-NEXT: bnez a6, .LBB57_9
6798 ; RV32ZVE32F-NEXT: .LBB57_3:
6799 ; RV32ZVE32F-NEXT: lw a6, 20(a3)
6800 ; RV32ZVE32F-NEXT: lw a7, 16(a3)
6801 ; RV32ZVE32F-NEXT: andi t1, t0, 8
6802 ; RV32ZVE32F-NEXT: bnez t1, .LBB57_10
6803 ; RV32ZVE32F-NEXT: .LBB57_4:
6804 ; RV32ZVE32F-NEXT: lw t1, 28(a3)
6805 ; RV32ZVE32F-NEXT: lw t2, 24(a3)
6806 ; RV32ZVE32F-NEXT: andi t3, t0, 16
6807 ; RV32ZVE32F-NEXT: bnez t3, .LBB57_11
6808 ; RV32ZVE32F-NEXT: .LBB57_5:
6809 ; RV32ZVE32F-NEXT: lw t3, 36(a3)
6810 ; RV32ZVE32F-NEXT: lw t4, 32(a3)
6811 ; RV32ZVE32F-NEXT: andi t5, t0, 32
6812 ; RV32ZVE32F-NEXT: bnez t5, .LBB57_12
6813 ; RV32ZVE32F-NEXT: .LBB57_6:
6814 ; RV32ZVE32F-NEXT: lw t5, 44(a3)
6815 ; RV32ZVE32F-NEXT: lw t6, 40(a3)
6816 ; RV32ZVE32F-NEXT: j .LBB57_13
6817 ; RV32ZVE32F-NEXT: .LBB57_7:
6818 ; RV32ZVE32F-NEXT: lw a1, 4(a3)
6819 ; RV32ZVE32F-NEXT: lw a2, 0(a3)
6820 ; RV32ZVE32F-NEXT: andi a4, t0, 2
6821 ; RV32ZVE32F-NEXT: beqz a4, .LBB57_2
6822 ; RV32ZVE32F-NEXT: .LBB57_8: # %cond.load1
6823 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6824 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
6825 ; RV32ZVE32F-NEXT: vmv.x.s a5, v10
6826 ; RV32ZVE32F-NEXT: lw a4, 4(a5)
6827 ; RV32ZVE32F-NEXT: lw a5, 0(a5)
6828 ; RV32ZVE32F-NEXT: andi a6, t0, 4
6829 ; RV32ZVE32F-NEXT: beqz a6, .LBB57_3
6830 ; RV32ZVE32F-NEXT: .LBB57_9: # %cond.load4
6831 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6832 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
6833 ; RV32ZVE32F-NEXT: vmv.x.s a7, v10
6834 ; RV32ZVE32F-NEXT: lw a6, 4(a7)
6835 ; RV32ZVE32F-NEXT: lw a7, 0(a7)
6836 ; RV32ZVE32F-NEXT: andi t1, t0, 8
6837 ; RV32ZVE32F-NEXT: beqz t1, .LBB57_4
6838 ; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load7
6839 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
6840 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
6841 ; RV32ZVE32F-NEXT: vmv.x.s t2, v10
6842 ; RV32ZVE32F-NEXT: lw t1, 4(t2)
6843 ; RV32ZVE32F-NEXT: lw t2, 0(t2)
6844 ; RV32ZVE32F-NEXT: andi t3, t0, 16
6845 ; RV32ZVE32F-NEXT: beqz t3, .LBB57_5
6846 ; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load10
6847 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6848 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
6849 ; RV32ZVE32F-NEXT: vmv.x.s t4, v10
6850 ; RV32ZVE32F-NEXT: lw t3, 4(t4)
6851 ; RV32ZVE32F-NEXT: lw t4, 0(t4)
6852 ; RV32ZVE32F-NEXT: andi t5, t0, 32
6853 ; RV32ZVE32F-NEXT: beqz t5, .LBB57_6
6854 ; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load13
6855 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6856 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
6857 ; RV32ZVE32F-NEXT: vmv.x.s t6, v10
6858 ; RV32ZVE32F-NEXT: lw t5, 4(t6)
6859 ; RV32ZVE32F-NEXT: lw t6, 0(t6)
6860 ; RV32ZVE32F-NEXT: .LBB57_13: # %else14
6861 ; RV32ZVE32F-NEXT: addi sp, sp, -16
6862 ; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
6863 ; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
6864 ; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
6865 ; RV32ZVE32F-NEXT: .cfi_offset s0, -4
6866 ; RV32ZVE32F-NEXT: .cfi_offset s1, -8
6867 ; RV32ZVE32F-NEXT: andi s0, t0, 64
6868 ; RV32ZVE32F-NEXT: beqz s0, .LBB57_16
6869 ; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
6870 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6871 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
6872 ; RV32ZVE32F-NEXT: vmv.x.s s1, v10
6873 ; RV32ZVE32F-NEXT: lw s0, 4(s1)
6874 ; RV32ZVE32F-NEXT: lw s1, 0(s1)
6875 ; RV32ZVE32F-NEXT: andi t0, t0, -128
6876 ; RV32ZVE32F-NEXT: bnez t0, .LBB57_17
6877 ; RV32ZVE32F-NEXT: .LBB57_15:
6878 ; RV32ZVE32F-NEXT: lw t0, 60(a3)
6879 ; RV32ZVE32F-NEXT: lw a3, 56(a3)
6880 ; RV32ZVE32F-NEXT: j .LBB57_18
6881 ; RV32ZVE32F-NEXT: .LBB57_16:
6882 ; RV32ZVE32F-NEXT: lw s0, 52(a3)
6883 ; RV32ZVE32F-NEXT: lw s1, 48(a3)
6884 ; RV32ZVE32F-NEXT: andi t0, t0, -128
6885 ; RV32ZVE32F-NEXT: beqz t0, .LBB57_15
6886 ; RV32ZVE32F-NEXT: .LBB57_17: # %cond.load19
6887 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
6888 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
6889 ; RV32ZVE32F-NEXT: vmv.x.s a3, v8
6890 ; RV32ZVE32F-NEXT: lw t0, 4(a3)
6891 ; RV32ZVE32F-NEXT: lw a3, 0(a3)
6892 ; RV32ZVE32F-NEXT: .LBB57_18: # %else20
6893 ; RV32ZVE32F-NEXT: sw a2, 0(a0)
6894 ; RV32ZVE32F-NEXT: sw a1, 4(a0)
6895 ; RV32ZVE32F-NEXT: sw a5, 8(a0)
6896 ; RV32ZVE32F-NEXT: sw a4, 12(a0)
6897 ; RV32ZVE32F-NEXT: sw a7, 16(a0)
6898 ; RV32ZVE32F-NEXT: sw a6, 20(a0)
6899 ; RV32ZVE32F-NEXT: sw t2, 24(a0)
6900 ; RV32ZVE32F-NEXT: sw t1, 28(a0)
6901 ; RV32ZVE32F-NEXT: sw t4, 32(a0)
6902 ; RV32ZVE32F-NEXT: sw t3, 36(a0)
6903 ; RV32ZVE32F-NEXT: sw t6, 40(a0)
6904 ; RV32ZVE32F-NEXT: sw t5, 44(a0)
6905 ; RV32ZVE32F-NEXT: sw s1, 48(a0)
6906 ; RV32ZVE32F-NEXT: sw s0, 52(a0)
6907 ; RV32ZVE32F-NEXT: sw a3, 56(a0)
6908 ; RV32ZVE32F-NEXT: sw t0, 60(a0)
6909 ; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
6910 ; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
6911 ; RV32ZVE32F-NEXT: addi sp, sp, 16
6912 ; RV32ZVE32F-NEXT: ret
6914 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i64:
6915 ; RV64ZVE32F: # %bb.0:
6916 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
6917 ; RV64ZVE32F-NEXT: vmv.x.s a6, v0
6918 ; RV64ZVE32F-NEXT: andi a4, a6, 1
6919 ; RV64ZVE32F-NEXT: beqz a4, .LBB57_9
6920 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
6921 ; RV64ZVE32F-NEXT: ld a4, 0(a2)
6922 ; RV64ZVE32F-NEXT: slli a4, a4, 3
6923 ; RV64ZVE32F-NEXT: add a4, a1, a4
6924 ; RV64ZVE32F-NEXT: ld a4, 0(a4)
6925 ; RV64ZVE32F-NEXT: andi a5, a6, 2
6926 ; RV64ZVE32F-NEXT: bnez a5, .LBB57_10
6927 ; RV64ZVE32F-NEXT: .LBB57_2:
6928 ; RV64ZVE32F-NEXT: ld a5, 8(a3)
6929 ; RV64ZVE32F-NEXT: andi a7, a6, 4
6930 ; RV64ZVE32F-NEXT: bnez a7, .LBB57_11
6931 ; RV64ZVE32F-NEXT: .LBB57_3:
6932 ; RV64ZVE32F-NEXT: ld a7, 16(a3)
6933 ; RV64ZVE32F-NEXT: andi t0, a6, 8
6934 ; RV64ZVE32F-NEXT: bnez t0, .LBB57_12
6935 ; RV64ZVE32F-NEXT: .LBB57_4:
6936 ; RV64ZVE32F-NEXT: ld t0, 24(a3)
6937 ; RV64ZVE32F-NEXT: andi t1, a6, 16
6938 ; RV64ZVE32F-NEXT: bnez t1, .LBB57_13
6939 ; RV64ZVE32F-NEXT: .LBB57_5:
6940 ; RV64ZVE32F-NEXT: ld t1, 32(a3)
6941 ; RV64ZVE32F-NEXT: andi t2, a6, 32
6942 ; RV64ZVE32F-NEXT: bnez t2, .LBB57_14
6943 ; RV64ZVE32F-NEXT: .LBB57_6:
6944 ; RV64ZVE32F-NEXT: ld t2, 40(a3)
6945 ; RV64ZVE32F-NEXT: andi t3, a6, 64
6946 ; RV64ZVE32F-NEXT: bnez t3, .LBB57_15
6947 ; RV64ZVE32F-NEXT: .LBB57_7:
6948 ; RV64ZVE32F-NEXT: ld t3, 48(a3)
6949 ; RV64ZVE32F-NEXT: andi a6, a6, -128
6950 ; RV64ZVE32F-NEXT: bnez a6, .LBB57_16
6951 ; RV64ZVE32F-NEXT: .LBB57_8:
6952 ; RV64ZVE32F-NEXT: ld a1, 56(a3)
6953 ; RV64ZVE32F-NEXT: j .LBB57_17
6954 ; RV64ZVE32F-NEXT: .LBB57_9:
6955 ; RV64ZVE32F-NEXT: ld a4, 0(a3)
6956 ; RV64ZVE32F-NEXT: andi a5, a6, 2
6957 ; RV64ZVE32F-NEXT: beqz a5, .LBB57_2
6958 ; RV64ZVE32F-NEXT: .LBB57_10: # %cond.load1
6959 ; RV64ZVE32F-NEXT: ld a5, 8(a2)
6960 ; RV64ZVE32F-NEXT: slli a5, a5, 3
6961 ; RV64ZVE32F-NEXT: add a5, a1, a5
6962 ; RV64ZVE32F-NEXT: ld a5, 0(a5)
6963 ; RV64ZVE32F-NEXT: andi a7, a6, 4
6964 ; RV64ZVE32F-NEXT: beqz a7, .LBB57_3
6965 ; RV64ZVE32F-NEXT: .LBB57_11: # %cond.load4
6966 ; RV64ZVE32F-NEXT: ld a7, 16(a2)
6967 ; RV64ZVE32F-NEXT: slli a7, a7, 3
6968 ; RV64ZVE32F-NEXT: add a7, a1, a7
6969 ; RV64ZVE32F-NEXT: ld a7, 0(a7)
6970 ; RV64ZVE32F-NEXT: andi t0, a6, 8
6971 ; RV64ZVE32F-NEXT: beqz t0, .LBB57_4
6972 ; RV64ZVE32F-NEXT: .LBB57_12: # %cond.load7
6973 ; RV64ZVE32F-NEXT: ld t0, 24(a2)
6974 ; RV64ZVE32F-NEXT: slli t0, t0, 3
6975 ; RV64ZVE32F-NEXT: add t0, a1, t0
6976 ; RV64ZVE32F-NEXT: ld t0, 0(t0)
6977 ; RV64ZVE32F-NEXT: andi t1, a6, 16
6978 ; RV64ZVE32F-NEXT: beqz t1, .LBB57_5
6979 ; RV64ZVE32F-NEXT: .LBB57_13: # %cond.load10
6980 ; RV64ZVE32F-NEXT: ld t1, 32(a2)
6981 ; RV64ZVE32F-NEXT: slli t1, t1, 3
6982 ; RV64ZVE32F-NEXT: add t1, a1, t1
6983 ; RV64ZVE32F-NEXT: ld t1, 0(t1)
6984 ; RV64ZVE32F-NEXT: andi t2, a6, 32
6985 ; RV64ZVE32F-NEXT: beqz t2, .LBB57_6
6986 ; RV64ZVE32F-NEXT: .LBB57_14: # %cond.load13
6987 ; RV64ZVE32F-NEXT: ld t2, 40(a2)
6988 ; RV64ZVE32F-NEXT: slli t2, t2, 3
6989 ; RV64ZVE32F-NEXT: add t2, a1, t2
6990 ; RV64ZVE32F-NEXT: ld t2, 0(t2)
6991 ; RV64ZVE32F-NEXT: andi t3, a6, 64
6992 ; RV64ZVE32F-NEXT: beqz t3, .LBB57_7
6993 ; RV64ZVE32F-NEXT: .LBB57_15: # %cond.load16
6994 ; RV64ZVE32F-NEXT: ld t3, 48(a2)
6995 ; RV64ZVE32F-NEXT: slli t3, t3, 3
6996 ; RV64ZVE32F-NEXT: add t3, a1, t3
6997 ; RV64ZVE32F-NEXT: ld t3, 0(t3)
6998 ; RV64ZVE32F-NEXT: andi a6, a6, -128
6999 ; RV64ZVE32F-NEXT: beqz a6, .LBB57_8
7000 ; RV64ZVE32F-NEXT: .LBB57_16: # %cond.load19
7001 ; RV64ZVE32F-NEXT: ld a2, 56(a2)
7002 ; RV64ZVE32F-NEXT: slli a2, a2, 3
7003 ; RV64ZVE32F-NEXT: add a1, a1, a2
7004 ; RV64ZVE32F-NEXT: ld a1, 0(a1)
7005 ; RV64ZVE32F-NEXT: .LBB57_17: # %else20
7006 ; RV64ZVE32F-NEXT: sd a4, 0(a0)
7007 ; RV64ZVE32F-NEXT: sd a5, 8(a0)
7008 ; RV64ZVE32F-NEXT: sd a7, 16(a0)
7009 ; RV64ZVE32F-NEXT: sd t0, 24(a0)
7010 ; RV64ZVE32F-NEXT: sd t1, 32(a0)
7011 ; RV64ZVE32F-NEXT: sd t2, 40(a0)
7012 ; RV64ZVE32F-NEXT: sd t3, 48(a0)
7013 ; RV64ZVE32F-NEXT: sd a1, 56(a0)
7014 ; RV64ZVE32F-NEXT: ret
7015 %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
7016 %v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
7020 declare <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x half>)
7022 define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passthru) {
7023 ; RV32V-LABEL: mgather_v1f16:
7025 ; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
7026 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
7027 ; RV32V-NEXT: vmv1r.v v8, v9
7030 ; RV64V-LABEL: mgather_v1f16:
7032 ; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
7033 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
7034 ; RV64V-NEXT: vmv1r.v v8, v9
7037 ; RV32ZVE32F-LABEL: mgather_v1f16:
7038 ; RV32ZVE32F: # %bb.0:
7039 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
7040 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
7041 ; RV32ZVE32F-NEXT: vmv1r.v v8, v9
7042 ; RV32ZVE32F-NEXT: ret
7044 ; RV64ZVE32F-LABEL: mgather_v1f16:
7045 ; RV64ZVE32F: # %bb.0:
7046 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
7047 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
7048 ; RV64ZVE32F-NEXT: bnez a1, .LBB58_2
7049 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
7050 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7051 ; RV64ZVE32F-NEXT: vle16.v v8, (a0)
7052 ; RV64ZVE32F-NEXT: .LBB58_2: # %else
7053 ; RV64ZVE32F-NEXT: ret
7054 %v = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru)
7058 declare <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x half>)
7060 define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passthru) {
7061 ; RV32V-LABEL: mgather_v2f16:
7063 ; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
7064 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
7065 ; RV32V-NEXT: vmv1r.v v8, v9
7068 ; RV64V-LABEL: mgather_v2f16:
7070 ; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
7071 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
7072 ; RV64V-NEXT: vmv1r.v v8, v9
7075 ; RV32ZVE32F-LABEL: mgather_v2f16:
7076 ; RV32ZVE32F: # %bb.0:
7077 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
7078 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
7079 ; RV32ZVE32F-NEXT: vmv1r.v v8, v9
7080 ; RV32ZVE32F-NEXT: ret
7082 ; RV64ZVE32F-LABEL: mgather_v2f16:
7083 ; RV64ZVE32F: # %bb.0:
7084 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7085 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
7086 ; RV64ZVE32F-NEXT: andi a3, a2, 1
7087 ; RV64ZVE32F-NEXT: bnez a3, .LBB59_3
7088 ; RV64ZVE32F-NEXT: # %bb.1: # %else
7089 ; RV64ZVE32F-NEXT: andi a2, a2, 2
7090 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_4
7091 ; RV64ZVE32F-NEXT: .LBB59_2: # %else2
7092 ; RV64ZVE32F-NEXT: ret
7093 ; RV64ZVE32F-NEXT: .LBB59_3: # %cond.load
7094 ; RV64ZVE32F-NEXT: flh fa5, 0(a0)
7095 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
7096 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7097 ; RV64ZVE32F-NEXT: andi a2, a2, 2
7098 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_2
7099 ; RV64ZVE32F-NEXT: .LBB59_4: # %cond.load1
7100 ; RV64ZVE32F-NEXT: flh fa5, 0(a1)
7101 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7102 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7103 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
7104 ; RV64ZVE32F-NEXT: ret
7105 %v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru)
7109 declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>)
7111 define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passthru) {
7112 ; RV32-LABEL: mgather_v4f16:
7114 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
7115 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
7116 ; RV32-NEXT: vmv1r.v v8, v9
7119 ; RV64V-LABEL: mgather_v4f16:
7121 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
7122 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
7123 ; RV64V-NEXT: vmv1r.v v8, v10
7126 ; RV64ZVE32F-LABEL: mgather_v4f16:
7127 ; RV64ZVE32F: # %bb.0:
7128 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7129 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7130 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7131 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_5
7132 ; RV64ZVE32F-NEXT: # %bb.1: # %else
7133 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7134 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_6
7135 ; RV64ZVE32F-NEXT: .LBB60_2: # %else2
7136 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7137 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_7
7138 ; RV64ZVE32F-NEXT: .LBB60_3: # %else5
7139 ; RV64ZVE32F-NEXT: andi a1, a1, 8
7140 ; RV64ZVE32F-NEXT: bnez a1, .LBB60_8
7141 ; RV64ZVE32F-NEXT: .LBB60_4: # %else8
7142 ; RV64ZVE32F-NEXT: ret
7143 ; RV64ZVE32F-NEXT: .LBB60_5: # %cond.load
7144 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
7145 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7146 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
7147 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7148 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7149 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_2
7150 ; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1
7151 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
7152 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7153 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
7154 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7155 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
7156 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7157 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_3
7158 ; RV64ZVE32F-NEXT: .LBB60_7: # %cond.load4
7159 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
7160 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7161 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
7162 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7163 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
7164 ; RV64ZVE32F-NEXT: andi a1, a1, 8
7165 ; RV64ZVE32F-NEXT: beqz a1, .LBB60_4
7166 ; RV64ZVE32F-NEXT: .LBB60_8: # %cond.load7
7167 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
7168 ; RV64ZVE32F-NEXT: flh fa5, 0(a0)
7169 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
7170 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7171 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
7172 ; RV64ZVE32F-NEXT: ret
7173 %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x half> %passthru)
7177 define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
7178 ; RV32-LABEL: mgather_truemask_v4f16:
7180 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
7181 ; RV32-NEXT: vluxei32.v v9, (zero), v8
7182 ; RV32-NEXT: vmv1r.v v8, v9
7185 ; RV64V-LABEL: mgather_truemask_v4f16:
7187 ; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
7188 ; RV64V-NEXT: vluxei64.v v10, (zero), v8
7189 ; RV64V-NEXT: vmv1r.v v8, v10
7192 ; RV64ZVE32F-LABEL: mgather_truemask_v4f16:
7193 ; RV64ZVE32F: # %bb.0:
7194 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
7195 ; RV64ZVE32F-NEXT: vmset.m v9
7196 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
7197 ; RV64ZVE32F-NEXT: beqz zero, .LBB61_5
7198 ; RV64ZVE32F-NEXT: # %bb.1: # %else
7199 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7200 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_6
7201 ; RV64ZVE32F-NEXT: .LBB61_2: # %else2
7202 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7203 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_7
7204 ; RV64ZVE32F-NEXT: .LBB61_3: # %else5
7205 ; RV64ZVE32F-NEXT: andi a1, a1, 8
7206 ; RV64ZVE32F-NEXT: bnez a1, .LBB61_8
7207 ; RV64ZVE32F-NEXT: .LBB61_4: # %else8
7208 ; RV64ZVE32F-NEXT: ret
7209 ; RV64ZVE32F-NEXT: .LBB61_5: # %cond.load
7210 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
7211 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7212 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, tu, ma
7213 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7214 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7215 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_2
7216 ; RV64ZVE32F-NEXT: .LBB61_6: # %cond.load1
7217 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
7218 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7219 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
7220 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7221 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
7222 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
7223 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7224 ; RV64ZVE32F-NEXT: beqz a2, .LBB61_3
7225 ; RV64ZVE32F-NEXT: .LBB61_7: # %cond.load4
7226 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
7227 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7228 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
7229 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7230 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
7231 ; RV64ZVE32F-NEXT: andi a1, a1, 8
7232 ; RV64ZVE32F-NEXT: beqz a1, .LBB61_4
7233 ; RV64ZVE32F-NEXT: .LBB61_8: # %cond.load7
7234 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
7235 ; RV64ZVE32F-NEXT: flh fa5, 0(a0)
7236 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
7237 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7238 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
7239 ; RV64ZVE32F-NEXT: ret
7240 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
7241 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
7242 %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %mtrue, <4 x half> %passthru)
7246 define <4 x half> @mgather_falsemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
7247 ; RV32-LABEL: mgather_falsemask_v4f16:
7249 ; RV32-NEXT: vmv1r.v v8, v9
7252 ; RV64V-LABEL: mgather_falsemask_v4f16:
7254 ; RV64V-NEXT: vmv1r.v v8, v10
7257 ; RV64ZVE32F-LABEL: mgather_falsemask_v4f16:
7258 ; RV64ZVE32F: # %bb.0:
7259 ; RV64ZVE32F-NEXT: ret
7260 %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x half> %passthru)
7264 declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>)
7266 define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passthru) {
7267 ; RV32-LABEL: mgather_v8f16:
7269 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
7270 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
7271 ; RV32-NEXT: vmv.v.v v8, v10
7274 ; RV64V-LABEL: mgather_v8f16:
7276 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
7277 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
7278 ; RV64V-NEXT: vmv.v.v v8, v12
7281 ; RV64ZVE32F-LABEL: mgather_v8f16:
7282 ; RV64ZVE32F: # %bb.0:
7283 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7284 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7285 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7286 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_9
7287 ; RV64ZVE32F-NEXT: # %bb.1: # %else
7288 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7289 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_10
7290 ; RV64ZVE32F-NEXT: .LBB63_2: # %else2
7291 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7292 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_11
7293 ; RV64ZVE32F-NEXT: .LBB63_3: # %else5
7294 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7295 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_12
7296 ; RV64ZVE32F-NEXT: .LBB63_4: # %else8
7297 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7298 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_13
7299 ; RV64ZVE32F-NEXT: .LBB63_5: # %else11
7300 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7301 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_14
7302 ; RV64ZVE32F-NEXT: .LBB63_6: # %else14
7303 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7304 ; RV64ZVE32F-NEXT: bnez a2, .LBB63_15
7305 ; RV64ZVE32F-NEXT: .LBB63_7: # %else17
7306 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7307 ; RV64ZVE32F-NEXT: bnez a1, .LBB63_16
7308 ; RV64ZVE32F-NEXT: .LBB63_8: # %else20
7309 ; RV64ZVE32F-NEXT: ret
7310 ; RV64ZVE32F-NEXT: .LBB63_9: # %cond.load
7311 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
7312 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7313 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma
7314 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7315 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7316 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_2
7317 ; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1
7318 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
7319 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7320 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
7321 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7322 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
7323 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7324 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_3
7325 ; RV64ZVE32F-NEXT: .LBB63_11: # %cond.load4
7326 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
7327 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7328 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
7329 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7330 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
7331 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7332 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_4
7333 ; RV64ZVE32F-NEXT: .LBB63_12: # %cond.load7
7334 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
7335 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7336 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
7337 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7338 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
7339 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7340 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_5
7341 ; RV64ZVE32F-NEXT: .LBB63_13: # %cond.load10
7342 ; RV64ZVE32F-NEXT: ld a2, 32(a0)
7343 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7344 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
7345 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7346 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
7347 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7348 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_6
7349 ; RV64ZVE32F-NEXT: .LBB63_14: # %cond.load13
7350 ; RV64ZVE32F-NEXT: ld a2, 40(a0)
7351 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7352 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
7353 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7354 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
7355 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7356 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_7
7357 ; RV64ZVE32F-NEXT: .LBB63_15: # %cond.load16
7358 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
7359 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7360 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
7361 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7362 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
7363 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7364 ; RV64ZVE32F-NEXT: beqz a1, .LBB63_8
7365 ; RV64ZVE32F-NEXT: .LBB63_16: # %cond.load19
7366 ; RV64ZVE32F-NEXT: ld a0, 56(a0)
7367 ; RV64ZVE32F-NEXT: flh fa5, 0(a0)
7368 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7369 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7370 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
7371 ; RV64ZVE32F-NEXT: ret
7372 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
7376 define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
7377 ; RV32-LABEL: mgather_baseidx_v8i8_v8f16:
7379 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7380 ; RV32-NEXT: vsext.vf4 v10, v8
7381 ; RV32-NEXT: vadd.vv v10, v10, v10
7382 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7383 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
7384 ; RV32-NEXT: vmv.v.v v8, v9
7387 ; RV64V-LABEL: mgather_baseidx_v8i8_v8f16:
7389 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7390 ; RV64V-NEXT: vsext.vf8 v12, v8
7391 ; RV64V-NEXT: vadd.vv v12, v12, v12
7392 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7393 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
7394 ; RV64V-NEXT: vmv.v.v v8, v9
7397 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f16:
7398 ; RV64ZVE32F: # %bb.0:
7399 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7400 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7401 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7402 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_2
7403 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
7404 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7405 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7406 ; RV64ZVE32F-NEXT: add a2, a0, a2
7407 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7408 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma
7409 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7410 ; RV64ZVE32F-NEXT: .LBB64_2: # %else
7411 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7412 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_4
7413 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
7414 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7415 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
7416 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7417 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7418 ; RV64ZVE32F-NEXT: add a2, a0, a2
7419 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7420 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7421 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
7422 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
7423 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
7424 ; RV64ZVE32F-NEXT: .LBB64_4: # %else2
7425 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7426 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
7427 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7428 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7429 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
7430 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_12
7431 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
7432 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7433 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_13
7434 ; RV64ZVE32F-NEXT: .LBB64_6: # %else8
7435 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7436 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_14
7437 ; RV64ZVE32F-NEXT: .LBB64_7: # %else11
7438 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7439 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_9
7440 ; RV64ZVE32F-NEXT: .LBB64_8: # %cond.load13
7441 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7442 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
7443 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7444 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7445 ; RV64ZVE32F-NEXT: add a2, a0, a2
7446 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7447 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7448 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7449 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
7450 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
7451 ; RV64ZVE32F-NEXT: .LBB64_9: # %else14
7452 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7453 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7454 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
7455 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_15
7456 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
7457 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7458 ; RV64ZVE32F-NEXT: bnez a1, .LBB64_16
7459 ; RV64ZVE32F-NEXT: .LBB64_11: # %else20
7460 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
7461 ; RV64ZVE32F-NEXT: ret
7462 ; RV64ZVE32F-NEXT: .LBB64_12: # %cond.load4
7463 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7464 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7465 ; RV64ZVE32F-NEXT: add a2, a0, a2
7466 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7467 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7468 ; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
7469 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
7470 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
7471 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7472 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_6
7473 ; RV64ZVE32F-NEXT: .LBB64_13: # %cond.load7
7474 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7475 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7476 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7477 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7478 ; RV64ZVE32F-NEXT: add a2, a0, a2
7479 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7480 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7481 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7482 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
7483 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
7484 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7485 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_7
7486 ; RV64ZVE32F-NEXT: .LBB64_14: # %cond.load10
7487 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7488 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7489 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7490 ; RV64ZVE32F-NEXT: add a2, a0, a2
7491 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7492 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
7493 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7494 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
7495 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7496 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_8
7497 ; RV64ZVE32F-NEXT: j .LBB64_9
7498 ; RV64ZVE32F-NEXT: .LBB64_15: # %cond.load16
7499 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7500 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7501 ; RV64ZVE32F-NEXT: add a2, a0, a2
7502 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7503 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7504 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
7505 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
7506 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
7507 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7508 ; RV64ZVE32F-NEXT: beqz a1, .LBB64_11
7509 ; RV64ZVE32F-NEXT: .LBB64_16: # %cond.load19
7510 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7511 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7512 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
7513 ; RV64ZVE32F-NEXT: slli a1, a1, 1
7514 ; RV64ZVE32F-NEXT: add a0, a0, a1
7515 ; RV64ZVE32F-NEXT: flh fa5, 0(a0)
7516 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7517 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7518 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7519 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
7520 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
7521 ; RV64ZVE32F-NEXT: ret
7522 %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
7523 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
7527 define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
7528 ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f16:
7530 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
7531 ; RV32-NEXT: vsext.vf4 v10, v8
7532 ; RV32-NEXT: vadd.vv v10, v10, v10
7533 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7534 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
7535 ; RV32-NEXT: vmv.v.v v8, v9
7538 ; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f16:
7540 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7541 ; RV64V-NEXT: vsext.vf8 v12, v8
7542 ; RV64V-NEXT: vadd.vv v12, v12, v12
7543 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7544 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
7545 ; RV64V-NEXT: vmv.v.v v8, v9
7548 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f16:
7549 ; RV64ZVE32F: # %bb.0:
7550 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7551 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7552 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7553 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_2
7554 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
7555 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7556 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7557 ; RV64ZVE32F-NEXT: add a2, a0, a2
7558 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7559 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma
7560 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7561 ; RV64ZVE32F-NEXT: .LBB65_2: # %else
7562 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7563 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_4
7564 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
7565 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7566 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
7567 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7568 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7569 ; RV64ZVE32F-NEXT: add a2, a0, a2
7570 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7571 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7572 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
7573 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
7574 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
7575 ; RV64ZVE32F-NEXT: .LBB65_4: # %else2
7576 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7577 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
7578 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7579 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7580 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
7581 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_12
7582 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
7583 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7584 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_13
7585 ; RV64ZVE32F-NEXT: .LBB65_6: # %else8
7586 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7587 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_14
7588 ; RV64ZVE32F-NEXT: .LBB65_7: # %else11
7589 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7590 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_9
7591 ; RV64ZVE32F-NEXT: .LBB65_8: # %cond.load13
7592 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7593 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
7594 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7595 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7596 ; RV64ZVE32F-NEXT: add a2, a0, a2
7597 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7598 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7599 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7600 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
7601 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
7602 ; RV64ZVE32F-NEXT: .LBB65_9: # %else14
7603 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7604 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7605 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
7606 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_15
7607 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
7608 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7609 ; RV64ZVE32F-NEXT: bnez a1, .LBB65_16
7610 ; RV64ZVE32F-NEXT: .LBB65_11: # %else20
7611 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
7612 ; RV64ZVE32F-NEXT: ret
7613 ; RV64ZVE32F-NEXT: .LBB65_12: # %cond.load4
7614 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7615 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7616 ; RV64ZVE32F-NEXT: add a2, a0, a2
7617 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7618 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7619 ; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
7620 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
7621 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
7622 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7623 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_6
7624 ; RV64ZVE32F-NEXT: .LBB65_13: # %cond.load7
7625 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7626 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7627 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7628 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7629 ; RV64ZVE32F-NEXT: add a2, a0, a2
7630 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7631 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7632 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7633 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
7634 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
7635 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7636 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_7
7637 ; RV64ZVE32F-NEXT: .LBB65_14: # %cond.load10
7638 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7639 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7640 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7641 ; RV64ZVE32F-NEXT: add a2, a0, a2
7642 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7643 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
7644 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7645 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
7646 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7647 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_8
7648 ; RV64ZVE32F-NEXT: j .LBB65_9
7649 ; RV64ZVE32F-NEXT: .LBB65_15: # %cond.load16
7650 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7651 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7652 ; RV64ZVE32F-NEXT: add a2, a0, a2
7653 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7654 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7655 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
7656 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
7657 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
7658 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7659 ; RV64ZVE32F-NEXT: beqz a1, .LBB65_11
7660 ; RV64ZVE32F-NEXT: .LBB65_16: # %cond.load19
7661 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7662 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7663 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
7664 ; RV64ZVE32F-NEXT: slli a1, a1, 1
7665 ; RV64ZVE32F-NEXT: add a0, a0, a1
7666 ; RV64ZVE32F-NEXT: flh fa5, 0(a0)
7667 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7668 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7669 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7670 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
7671 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
7672 ; RV64ZVE32F-NEXT: ret
7673 %eidxs = sext <8 x i8> %idxs to <8 x i16>
7674 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
7675 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
7679 define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
7680 ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f16:
7682 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
7683 ; RV32-NEXT: vwaddu.vv v10, v8, v8
7684 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7685 ; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t
7686 ; RV32-NEXT: vmv.v.v v8, v9
7689 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f16:
7691 ; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
7692 ; RV64V-NEXT: vwaddu.vv v10, v8, v8
7693 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7694 ; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t
7695 ; RV64V-NEXT: vmv.v.v v8, v9
7698 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f16:
7699 ; RV64ZVE32F: # %bb.0:
7700 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7701 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7702 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7703 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_2
7704 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
7705 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7706 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7707 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7708 ; RV64ZVE32F-NEXT: add a2, a0, a2
7709 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7710 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma
7711 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7712 ; RV64ZVE32F-NEXT: .LBB66_2: # %else
7713 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7714 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_4
7715 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
7716 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7717 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
7718 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7719 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7720 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7721 ; RV64ZVE32F-NEXT: add a2, a0, a2
7722 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7723 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7724 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
7725 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
7726 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
7727 ; RV64ZVE32F-NEXT: .LBB66_4: # %else2
7728 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
7729 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
7730 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7731 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7732 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
7733 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_12
7734 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
7735 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7736 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_13
7737 ; RV64ZVE32F-NEXT: .LBB66_6: # %else8
7738 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7739 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_14
7740 ; RV64ZVE32F-NEXT: .LBB66_7: # %else11
7741 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7742 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_9
7743 ; RV64ZVE32F-NEXT: .LBB66_8: # %cond.load13
7744 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7745 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
7746 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7747 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7748 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7749 ; RV64ZVE32F-NEXT: add a2, a0, a2
7750 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7751 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7752 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7753 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
7754 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
7755 ; RV64ZVE32F-NEXT: .LBB66_9: # %else14
7756 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
7757 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7758 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
7759 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_15
7760 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
7761 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7762 ; RV64ZVE32F-NEXT: bnez a1, .LBB66_16
7763 ; RV64ZVE32F-NEXT: .LBB66_11: # %else20
7764 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
7765 ; RV64ZVE32F-NEXT: ret
7766 ; RV64ZVE32F-NEXT: .LBB66_12: # %cond.load4
7767 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7768 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7769 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7770 ; RV64ZVE32F-NEXT: add a2, a0, a2
7771 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7772 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7773 ; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
7774 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
7775 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
7776 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7777 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_6
7778 ; RV64ZVE32F-NEXT: .LBB66_13: # %cond.load7
7779 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7780 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7781 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7782 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7783 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7784 ; RV64ZVE32F-NEXT: add a2, a0, a2
7785 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7786 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7787 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7788 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
7789 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
7790 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7791 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_7
7792 ; RV64ZVE32F-NEXT: .LBB66_14: # %cond.load10
7793 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7794 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7795 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7796 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7797 ; RV64ZVE32F-NEXT: add a2, a0, a2
7798 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7799 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
7800 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7801 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
7802 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7803 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_8
7804 ; RV64ZVE32F-NEXT: j .LBB66_9
7805 ; RV64ZVE32F-NEXT: .LBB66_15: # %cond.load16
7806 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7807 ; RV64ZVE32F-NEXT: andi a2, a2, 255
7808 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7809 ; RV64ZVE32F-NEXT: add a2, a0, a2
7810 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7811 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7812 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
7813 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
7814 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
7815 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7816 ; RV64ZVE32F-NEXT: beqz a1, .LBB66_11
7817 ; RV64ZVE32F-NEXT: .LBB66_16: # %cond.load19
7818 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7819 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7820 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
7821 ; RV64ZVE32F-NEXT: andi a1, a1, 255
7822 ; RV64ZVE32F-NEXT: slli a1, a1, 1
7823 ; RV64ZVE32F-NEXT: add a0, a0, a1
7824 ; RV64ZVE32F-NEXT: flh fa5, 0(a0)
7825 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
7826 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7827 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7828 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
7829 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
7830 ; RV64ZVE32F-NEXT: ret
7831 %eidxs = zext <8 x i8> %idxs to <8 x i16>
7832 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
7833 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
7837 define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x half> %passthru) {
7838 ; RV32-LABEL: mgather_baseidx_v8f16:
7840 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
7841 ; RV32-NEXT: vwadd.vv v10, v8, v8
7842 ; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
7843 ; RV32-NEXT: vmv.v.v v8, v9
7846 ; RV64V-LABEL: mgather_baseidx_v8f16:
7848 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
7849 ; RV64V-NEXT: vsext.vf4 v12, v8
7850 ; RV64V-NEXT: vadd.vv v12, v12, v12
7851 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
7852 ; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
7853 ; RV64V-NEXT: vmv.v.v v8, v9
7856 ; RV64ZVE32F-LABEL: mgather_baseidx_v8f16:
7857 ; RV64ZVE32F: # %bb.0:
7858 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
7859 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
7860 ; RV64ZVE32F-NEXT: andi a2, a1, 1
7861 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_2
7862 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
7863 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, mf2, tu, ma
7864 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7865 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7866 ; RV64ZVE32F-NEXT: add a2, a0, a2
7867 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7868 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
7869 ; RV64ZVE32F-NEXT: .LBB67_2: # %else
7870 ; RV64ZVE32F-NEXT: andi a2, a1, 2
7871 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_4
7872 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
7873 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7874 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
7875 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7876 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7877 ; RV64ZVE32F-NEXT: add a2, a0, a2
7878 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7879 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
7880 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
7881 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
7882 ; RV64ZVE32F-NEXT: .LBB67_4: # %else2
7883 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
7884 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
7885 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7886 ; RV64ZVE32F-NEXT: andi a2, a1, 4
7887 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
7888 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_12
7889 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
7890 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7891 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_13
7892 ; RV64ZVE32F-NEXT: .LBB67_6: # %else8
7893 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7894 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_14
7895 ; RV64ZVE32F-NEXT: .LBB67_7: # %else11
7896 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7897 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_9
7898 ; RV64ZVE32F-NEXT: .LBB67_8: # %cond.load13
7899 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7900 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
7901 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7902 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7903 ; RV64ZVE32F-NEXT: add a2, a0, a2
7904 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7905 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7906 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
7907 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
7908 ; RV64ZVE32F-NEXT: .LBB67_9: # %else14
7909 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
7910 ; RV64ZVE32F-NEXT: andi a2, a1, 64
7911 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
7912 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_15
7913 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
7914 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7915 ; RV64ZVE32F-NEXT: bnez a1, .LBB67_16
7916 ; RV64ZVE32F-NEXT: .LBB67_11: # %else20
7917 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
7918 ; RV64ZVE32F-NEXT: ret
7919 ; RV64ZVE32F-NEXT: .LBB67_12: # %cond.load4
7920 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7921 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7922 ; RV64ZVE32F-NEXT: add a2, a0, a2
7923 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7924 ; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
7925 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
7926 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
7927 ; RV64ZVE32F-NEXT: andi a2, a1, 8
7928 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_6
7929 ; RV64ZVE32F-NEXT: .LBB67_13: # %cond.load7
7930 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7931 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7932 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7933 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7934 ; RV64ZVE32F-NEXT: add a2, a0, a2
7935 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7936 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7937 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
7938 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
7939 ; RV64ZVE32F-NEXT: andi a2, a1, 16
7940 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_7
7941 ; RV64ZVE32F-NEXT: .LBB67_14: # %cond.load10
7942 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
7943 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
7944 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7945 ; RV64ZVE32F-NEXT: add a2, a0, a2
7946 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7947 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7948 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
7949 ; RV64ZVE32F-NEXT: andi a2, a1, 32
7950 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_8
7951 ; RV64ZVE32F-NEXT: j .LBB67_9
7952 ; RV64ZVE32F-NEXT: .LBB67_15: # %cond.load16
7953 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
7954 ; RV64ZVE32F-NEXT: slli a2, a2, 1
7955 ; RV64ZVE32F-NEXT: add a2, a0, a2
7956 ; RV64ZVE32F-NEXT: flh fa5, 0(a2)
7957 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
7958 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
7959 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
7960 ; RV64ZVE32F-NEXT: andi a1, a1, -128
7961 ; RV64ZVE32F-NEXT: beqz a1, .LBB67_11
7962 ; RV64ZVE32F-NEXT: .LBB67_16: # %cond.load19
7963 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
7964 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
7965 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
7966 ; RV64ZVE32F-NEXT: slli a1, a1, 1
7967 ; RV64ZVE32F-NEXT: add a0, a0, a1
7968 ; RV64ZVE32F-NEXT: flh fa5, 0(a0)
7969 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
7970 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
7971 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
7972 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
7973 ; RV64ZVE32F-NEXT: ret
7974 %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
7975 %v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
7979 declare <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x float>)
7981 define <1 x float> @mgather_v1f32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x float> %passthru) {
7982 ; RV32V-LABEL: mgather_v1f32:
7984 ; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
7985 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
7986 ; RV32V-NEXT: vmv1r.v v8, v9
7989 ; RV64V-LABEL: mgather_v1f32:
7991 ; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
7992 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
7993 ; RV64V-NEXT: vmv1r.v v8, v9
7996 ; RV32ZVE32F-LABEL: mgather_v1f32:
7997 ; RV32ZVE32F: # %bb.0:
7998 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
7999 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
8000 ; RV32ZVE32F-NEXT: vmv.v.v v8, v9
8001 ; RV32ZVE32F-NEXT: ret
8003 ; RV64ZVE32F-LABEL: mgather_v1f32:
8004 ; RV64ZVE32F: # %bb.0:
8005 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
8006 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
8007 ; RV64ZVE32F-NEXT: bnez a1, .LBB68_2
8008 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
8009 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
8010 ; RV64ZVE32F-NEXT: vle32.v v8, (a0)
8011 ; RV64ZVE32F-NEXT: .LBB68_2: # %else
8012 ; RV64ZVE32F-NEXT: ret
8013 %v = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x float> %passthru)
8017 declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>)
8019 define <2 x float> @mgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x float> %passthru) {
8020 ; RV32V-LABEL: mgather_v2f32:
8022 ; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
8023 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
8024 ; RV32V-NEXT: vmv1r.v v8, v9
8027 ; RV64V-LABEL: mgather_v2f32:
8029 ; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
8030 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
8031 ; RV64V-NEXT: vmv1r.v v8, v9
8034 ; RV32ZVE32F-LABEL: mgather_v2f32:
8035 ; RV32ZVE32F: # %bb.0:
8036 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
8037 ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
8038 ; RV32ZVE32F-NEXT: vmv.v.v v8, v9
8039 ; RV32ZVE32F-NEXT: ret
8041 ; RV64ZVE32F-LABEL: mgather_v2f32:
8042 ; RV64ZVE32F: # %bb.0:
8043 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8044 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
8045 ; RV64ZVE32F-NEXT: andi a3, a2, 1
8046 ; RV64ZVE32F-NEXT: bnez a3, .LBB69_3
8047 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8048 ; RV64ZVE32F-NEXT: andi a2, a2, 2
8049 ; RV64ZVE32F-NEXT: bnez a2, .LBB69_4
8050 ; RV64ZVE32F-NEXT: .LBB69_2: # %else2
8051 ; RV64ZVE32F-NEXT: ret
8052 ; RV64ZVE32F-NEXT: .LBB69_3: # %cond.load
8053 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
8054 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
8055 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8056 ; RV64ZVE32F-NEXT: andi a2, a2, 2
8057 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_2
8058 ; RV64ZVE32F-NEXT: .LBB69_4: # %cond.load1
8059 ; RV64ZVE32F-NEXT: flw fa5, 0(a1)
8060 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
8061 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
8062 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
8063 ; RV64ZVE32F-NEXT: ret
8064 %v = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x float> %passthru)
8068 declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>)
8070 define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %passthru) {
8071 ; RV32-LABEL: mgather_v4f32:
8073 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
8074 ; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
8075 ; RV32-NEXT: vmv.v.v v8, v9
8078 ; RV64V-LABEL: mgather_v4f32:
8080 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, mu
8081 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
8082 ; RV64V-NEXT: vmv.v.v v8, v10
8085 ; RV64ZVE32F-LABEL: mgather_v4f32:
8086 ; RV64ZVE32F: # %bb.0:
8087 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8088 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8089 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8090 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_5
8091 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8092 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8093 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_6
8094 ; RV64ZVE32F-NEXT: .LBB70_2: # %else2
8095 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8096 ; RV64ZVE32F-NEXT: bnez a2, .LBB70_7
8097 ; RV64ZVE32F-NEXT: .LBB70_3: # %else5
8098 ; RV64ZVE32F-NEXT: andi a1, a1, 8
8099 ; RV64ZVE32F-NEXT: bnez a1, .LBB70_8
8100 ; RV64ZVE32F-NEXT: .LBB70_4: # %else8
8101 ; RV64ZVE32F-NEXT: ret
8102 ; RV64ZVE32F-NEXT: .LBB70_5: # %cond.load
8103 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
8104 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8105 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
8106 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8107 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8108 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_2
8109 ; RV64ZVE32F-NEXT: .LBB70_6: # %cond.load1
8110 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
8111 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8112 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
8113 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
8114 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
8115 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8116 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_3
8117 ; RV64ZVE32F-NEXT: .LBB70_7: # %cond.load4
8118 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
8119 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8120 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
8121 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
8122 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
8123 ; RV64ZVE32F-NEXT: andi a1, a1, 8
8124 ; RV64ZVE32F-NEXT: beqz a1, .LBB70_4
8125 ; RV64ZVE32F-NEXT: .LBB70_8: # %cond.load7
8126 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
8127 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
8128 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
8129 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
8130 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
8131 ; RV64ZVE32F-NEXT: ret
8132 %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x float> %passthru)
8136 define <4 x float> @mgather_truemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthru) {
8137 ; RV32-LABEL: mgather_truemask_v4f32:
8139 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
8140 ; RV32-NEXT: vluxei32.v v8, (zero), v8
8143 ; RV64V-LABEL: mgather_truemask_v4f32:
8145 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
8146 ; RV64V-NEXT: vluxei64.v v10, (zero), v8
8147 ; RV64V-NEXT: vmv.v.v v8, v10
8150 ; RV64ZVE32F-LABEL: mgather_truemask_v4f32:
8151 ; RV64ZVE32F: # %bb.0:
8152 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
8153 ; RV64ZVE32F-NEXT: vmset.m v9
8154 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9
8155 ; RV64ZVE32F-NEXT: beqz zero, .LBB71_5
8156 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8157 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8158 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_6
8159 ; RV64ZVE32F-NEXT: .LBB71_2: # %else2
8160 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8161 ; RV64ZVE32F-NEXT: bnez a2, .LBB71_7
8162 ; RV64ZVE32F-NEXT: .LBB71_3: # %else5
8163 ; RV64ZVE32F-NEXT: andi a1, a1, 8
8164 ; RV64ZVE32F-NEXT: bnez a1, .LBB71_8
8165 ; RV64ZVE32F-NEXT: .LBB71_4: # %else8
8166 ; RV64ZVE32F-NEXT: ret
8167 ; RV64ZVE32F-NEXT: .LBB71_5: # %cond.load
8168 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
8169 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8170 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma
8171 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8172 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8173 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_2
8174 ; RV64ZVE32F-NEXT: .LBB71_6: # %cond.load1
8175 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
8176 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8177 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma
8178 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
8179 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
8180 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
8181 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8182 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_3
8183 ; RV64ZVE32F-NEXT: .LBB71_7: # %cond.load4
8184 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
8185 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8186 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
8187 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
8188 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
8189 ; RV64ZVE32F-NEXT: andi a1, a1, 8
8190 ; RV64ZVE32F-NEXT: beqz a1, .LBB71_4
8191 ; RV64ZVE32F-NEXT: .LBB71_8: # %cond.load7
8192 ; RV64ZVE32F-NEXT: ld a0, 24(a0)
8193 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
8194 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
8195 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
8196 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
8197 ; RV64ZVE32F-NEXT: ret
8198 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
8199 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
8200 %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mtrue, <4 x float> %passthru)
8204 define <4 x float> @mgather_falsemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthru) {
8205 ; RV32-LABEL: mgather_falsemask_v4f32:
8207 ; RV32-NEXT: vmv1r.v v8, v9
8210 ; RV64V-LABEL: mgather_falsemask_v4f32:
8212 ; RV64V-NEXT: vmv1r.v v8, v10
8215 ; RV64ZVE32F-LABEL: mgather_falsemask_v4f32:
8216 ; RV64ZVE32F: # %bb.0:
8217 ; RV64ZVE32F-NEXT: ret
8218 %v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer, <4 x float> %passthru)
8222 declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>)
8224 define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %passthru) {
8225 ; RV32-LABEL: mgather_v8f32:
8227 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
8228 ; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
8229 ; RV32-NEXT: vmv.v.v v8, v10
8232 ; RV64V-LABEL: mgather_v8f32:
8234 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
8235 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
8236 ; RV64V-NEXT: vmv.v.v v8, v12
8239 ; RV64ZVE32F-LABEL: mgather_v8f32:
8240 ; RV64ZVE32F: # %bb.0:
8241 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8242 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8243 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8244 ; RV64ZVE32F-NEXT: bnez a2, .LBB73_9
8245 ; RV64ZVE32F-NEXT: # %bb.1: # %else
8246 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8247 ; RV64ZVE32F-NEXT: bnez a2, .LBB73_10
8248 ; RV64ZVE32F-NEXT: .LBB73_2: # %else2
8249 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8250 ; RV64ZVE32F-NEXT: bnez a2, .LBB73_11
8251 ; RV64ZVE32F-NEXT: .LBB73_3: # %else5
8252 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8253 ; RV64ZVE32F-NEXT: bnez a2, .LBB73_12
8254 ; RV64ZVE32F-NEXT: .LBB73_4: # %else8
8255 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8256 ; RV64ZVE32F-NEXT: bnez a2, .LBB73_13
8257 ; RV64ZVE32F-NEXT: .LBB73_5: # %else11
8258 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8259 ; RV64ZVE32F-NEXT: bnez a2, .LBB73_14
8260 ; RV64ZVE32F-NEXT: .LBB73_6: # %else14
8261 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8262 ; RV64ZVE32F-NEXT: bnez a2, .LBB73_15
8263 ; RV64ZVE32F-NEXT: .LBB73_7: # %else17
8264 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8265 ; RV64ZVE32F-NEXT: bnez a1, .LBB73_16
8266 ; RV64ZVE32F-NEXT: .LBB73_8: # %else20
8267 ; RV64ZVE32F-NEXT: ret
8268 ; RV64ZVE32F-NEXT: .LBB73_9: # %cond.load
8269 ; RV64ZVE32F-NEXT: ld a2, 0(a0)
8270 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8271 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
8272 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8273 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8274 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_2
8275 ; RV64ZVE32F-NEXT: .LBB73_10: # %cond.load1
8276 ; RV64ZVE32F-NEXT: ld a2, 8(a0)
8277 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8278 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
8279 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
8280 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1
8281 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8282 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_3
8283 ; RV64ZVE32F-NEXT: .LBB73_11: # %cond.load4
8284 ; RV64ZVE32F-NEXT: ld a2, 16(a0)
8285 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8286 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
8287 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
8288 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2
8289 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8290 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_4
8291 ; RV64ZVE32F-NEXT: .LBB73_12: # %cond.load7
8292 ; RV64ZVE32F-NEXT: ld a2, 24(a0)
8293 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8294 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
8295 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
8296 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3
8297 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8298 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_5
8299 ; RV64ZVE32F-NEXT: .LBB73_13: # %cond.load10
8300 ; RV64ZVE32F-NEXT: ld a2, 32(a0)
8301 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8302 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
8303 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
8304 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4
8305 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8306 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_6
8307 ; RV64ZVE32F-NEXT: .LBB73_14: # %cond.load13
8308 ; RV64ZVE32F-NEXT: ld a2, 40(a0)
8309 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8310 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
8311 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
8312 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5
8313 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8314 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_7
8315 ; RV64ZVE32F-NEXT: .LBB73_15: # %cond.load16
8316 ; RV64ZVE32F-NEXT: ld a2, 48(a0)
8317 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8318 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
8319 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
8320 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6
8321 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8322 ; RV64ZVE32F-NEXT: beqz a1, .LBB73_8
8323 ; RV64ZVE32F-NEXT: .LBB73_16: # %cond.load19
8324 ; RV64ZVE32F-NEXT: ld a0, 56(a0)
8325 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
8326 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8327 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
8328 ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7
8329 ; RV64ZVE32F-NEXT: ret
8330 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
8334 define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
8335 ; RV32-LABEL: mgather_baseidx_v8i8_v8f32:
8337 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
8338 ; RV32-NEXT: vsext.vf4 v12, v8
8339 ; RV32-NEXT: vsll.vi v8, v12, 2
8340 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
8341 ; RV32-NEXT: vmv.v.v v8, v10
8344 ; RV64V-LABEL: mgather_baseidx_v8i8_v8f32:
8346 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8347 ; RV64V-NEXT: vsext.vf8 v12, v8
8348 ; RV64V-NEXT: vsll.vi v12, v12, 2
8349 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
8350 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
8351 ; RV64V-NEXT: vmv.v.v v8, v10
8354 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f32:
8355 ; RV64ZVE32F: # %bb.0:
8356 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8357 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8358 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8359 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_2
8360 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
8361 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8362 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8363 ; RV64ZVE32F-NEXT: add a2, a0, a2
8364 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8365 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
8366 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
8367 ; RV64ZVE32F-NEXT: .LBB74_2: # %else
8368 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8369 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_4
8370 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
8371 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8372 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8373 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8374 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8375 ; RV64ZVE32F-NEXT: add a2, a0, a2
8376 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8377 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8378 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
8379 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
8380 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
8381 ; RV64ZVE32F-NEXT: .LBB74_4: # %else2
8382 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8383 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
8384 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8385 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8386 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
8387 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_12
8388 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
8389 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8390 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_13
8391 ; RV64ZVE32F-NEXT: .LBB74_6: # %else8
8392 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8393 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_14
8394 ; RV64ZVE32F-NEXT: .LBB74_7: # %else11
8395 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8396 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_9
8397 ; RV64ZVE32F-NEXT: .LBB74_8: # %cond.load13
8398 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8399 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
8400 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8401 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8402 ; RV64ZVE32F-NEXT: add a2, a0, a2
8403 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8404 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8405 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8406 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
8407 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
8408 ; RV64ZVE32F-NEXT: .LBB74_9: # %else14
8409 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8410 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8411 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
8412 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_15
8413 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
8414 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8415 ; RV64ZVE32F-NEXT: bnez a1, .LBB74_16
8416 ; RV64ZVE32F-NEXT: .LBB74_11: # %else20
8417 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
8418 ; RV64ZVE32F-NEXT: ret
8419 ; RV64ZVE32F-NEXT: .LBB74_12: # %cond.load4
8420 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8421 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8422 ; RV64ZVE32F-NEXT: add a2, a0, a2
8423 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8424 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8425 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
8426 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
8427 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
8428 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8429 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_6
8430 ; RV64ZVE32F-NEXT: .LBB74_13: # %cond.load7
8431 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8432 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8433 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8434 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8435 ; RV64ZVE32F-NEXT: add a2, a0, a2
8436 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8437 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8438 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8439 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
8440 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
8441 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8442 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_7
8443 ; RV64ZVE32F-NEXT: .LBB74_14: # %cond.load10
8444 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8445 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8446 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8447 ; RV64ZVE32F-NEXT: add a2, a0, a2
8448 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8449 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
8450 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8451 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
8452 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8453 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_8
8454 ; RV64ZVE32F-NEXT: j .LBB74_9
8455 ; RV64ZVE32F-NEXT: .LBB74_15: # %cond.load16
8456 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8457 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8458 ; RV64ZVE32F-NEXT: add a2, a0, a2
8459 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8460 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8461 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
8462 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
8463 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
8464 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8465 ; RV64ZVE32F-NEXT: beqz a1, .LBB74_11
8466 ; RV64ZVE32F-NEXT: .LBB74_16: # %cond.load19
8467 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8468 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8469 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
8470 ; RV64ZVE32F-NEXT: slli a1, a1, 2
8471 ; RV64ZVE32F-NEXT: add a0, a0, a1
8472 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
8473 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8474 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8475 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8476 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
8477 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
8478 ; RV64ZVE32F-NEXT: ret
8479 %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
8480 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
8484 define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
8485 ; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f32:
8487 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
8488 ; RV32-NEXT: vsext.vf4 v12, v8
8489 ; RV32-NEXT: vsll.vi v8, v12, 2
8490 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
8491 ; RV32-NEXT: vmv.v.v v8, v10
8494 ; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f32:
8496 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8497 ; RV64V-NEXT: vsext.vf8 v12, v8
8498 ; RV64V-NEXT: vsll.vi v12, v12, 2
8499 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
8500 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
8501 ; RV64V-NEXT: vmv.v.v v8, v10
8504 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f32:
8505 ; RV64ZVE32F: # %bb.0:
8506 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8507 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8508 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8509 ; RV64ZVE32F-NEXT: beqz a2, .LBB75_2
8510 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
8511 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8512 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8513 ; RV64ZVE32F-NEXT: add a2, a0, a2
8514 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8515 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
8516 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
8517 ; RV64ZVE32F-NEXT: .LBB75_2: # %else
8518 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8519 ; RV64ZVE32F-NEXT: beqz a2, .LBB75_4
8520 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
8521 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8522 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8523 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8524 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8525 ; RV64ZVE32F-NEXT: add a2, a0, a2
8526 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8527 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8528 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
8529 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
8530 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
8531 ; RV64ZVE32F-NEXT: .LBB75_4: # %else2
8532 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8533 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
8534 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8535 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8536 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
8537 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_12
8538 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
8539 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8540 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_13
8541 ; RV64ZVE32F-NEXT: .LBB75_6: # %else8
8542 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8543 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_14
8544 ; RV64ZVE32F-NEXT: .LBB75_7: # %else11
8545 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8546 ; RV64ZVE32F-NEXT: beqz a2, .LBB75_9
8547 ; RV64ZVE32F-NEXT: .LBB75_8: # %cond.load13
8548 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8549 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
8550 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8551 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8552 ; RV64ZVE32F-NEXT: add a2, a0, a2
8553 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8554 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8555 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8556 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
8557 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
8558 ; RV64ZVE32F-NEXT: .LBB75_9: # %else14
8559 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8560 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8561 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
8562 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_15
8563 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
8564 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8565 ; RV64ZVE32F-NEXT: bnez a1, .LBB75_16
8566 ; RV64ZVE32F-NEXT: .LBB75_11: # %else20
8567 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
8568 ; RV64ZVE32F-NEXT: ret
8569 ; RV64ZVE32F-NEXT: .LBB75_12: # %cond.load4
8570 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8571 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8572 ; RV64ZVE32F-NEXT: add a2, a0, a2
8573 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8574 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8575 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
8576 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
8577 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
8578 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8579 ; RV64ZVE32F-NEXT: beqz a2, .LBB75_6
8580 ; RV64ZVE32F-NEXT: .LBB75_13: # %cond.load7
8581 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8582 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8583 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8584 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8585 ; RV64ZVE32F-NEXT: add a2, a0, a2
8586 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8587 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8588 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8589 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
8590 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
8591 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8592 ; RV64ZVE32F-NEXT: beqz a2, .LBB75_7
8593 ; RV64ZVE32F-NEXT: .LBB75_14: # %cond.load10
8594 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8595 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8596 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8597 ; RV64ZVE32F-NEXT: add a2, a0, a2
8598 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8599 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
8600 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8601 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
8602 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8603 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_8
8604 ; RV64ZVE32F-NEXT: j .LBB75_9
8605 ; RV64ZVE32F-NEXT: .LBB75_15: # %cond.load16
8606 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8607 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8608 ; RV64ZVE32F-NEXT: add a2, a0, a2
8609 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8610 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8611 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
8612 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
8613 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
8614 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8615 ; RV64ZVE32F-NEXT: beqz a1, .LBB75_11
8616 ; RV64ZVE32F-NEXT: .LBB75_16: # %cond.load19
8617 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8618 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8619 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
8620 ; RV64ZVE32F-NEXT: slli a1, a1, 2
8621 ; RV64ZVE32F-NEXT: add a0, a0, a1
8622 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
8623 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8624 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8625 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8626 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
8627 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
8628 ; RV64ZVE32F-NEXT: ret
8629 %eidxs = sext <8 x i8> %idxs to <8 x i32>
8630 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
8631 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
8635 define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
8636 ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f32:
8638 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8639 ; RV32-NEXT: vzext.vf2 v9, v8
8640 ; RV32-NEXT: vsll.vi v8, v9, 2
8641 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
8642 ; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t
8643 ; RV32-NEXT: vmv.v.v v8, v10
8646 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f32:
8648 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
8649 ; RV64V-NEXT: vzext.vf2 v9, v8
8650 ; RV64V-NEXT: vsll.vi v8, v9, 2
8651 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
8652 ; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t
8653 ; RV64V-NEXT: vmv.v.v v8, v10
8656 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f32:
8657 ; RV64ZVE32F: # %bb.0:
8658 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8659 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8660 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8661 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_2
8662 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
8663 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8664 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8665 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8666 ; RV64ZVE32F-NEXT: add a2, a0, a2
8667 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8668 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
8669 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
8670 ; RV64ZVE32F-NEXT: .LBB76_2: # %else
8671 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8672 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_4
8673 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
8674 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8675 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8676 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8677 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8678 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8679 ; RV64ZVE32F-NEXT: add a2, a0, a2
8680 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8681 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8682 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
8683 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
8684 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
8685 ; RV64ZVE32F-NEXT: .LBB76_4: # %else2
8686 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
8687 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
8688 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8689 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8690 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
8691 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_12
8692 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
8693 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8694 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_13
8695 ; RV64ZVE32F-NEXT: .LBB76_6: # %else8
8696 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8697 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_14
8698 ; RV64ZVE32F-NEXT: .LBB76_7: # %else11
8699 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8700 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_9
8701 ; RV64ZVE32F-NEXT: .LBB76_8: # %cond.load13
8702 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8703 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
8704 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8705 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8706 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8707 ; RV64ZVE32F-NEXT: add a2, a0, a2
8708 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8709 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8710 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8711 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
8712 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
8713 ; RV64ZVE32F-NEXT: .LBB76_9: # %else14
8714 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
8715 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8716 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
8717 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_15
8718 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
8719 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8720 ; RV64ZVE32F-NEXT: bnez a1, .LBB76_16
8721 ; RV64ZVE32F-NEXT: .LBB76_11: # %else20
8722 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
8723 ; RV64ZVE32F-NEXT: ret
8724 ; RV64ZVE32F-NEXT: .LBB76_12: # %cond.load4
8725 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8726 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8727 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8728 ; RV64ZVE32F-NEXT: add a2, a0, a2
8729 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8730 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8731 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
8732 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
8733 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
8734 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8735 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_6
8736 ; RV64ZVE32F-NEXT: .LBB76_13: # %cond.load7
8737 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8738 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8739 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8740 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8741 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8742 ; RV64ZVE32F-NEXT: add a2, a0, a2
8743 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8744 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8745 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8746 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
8747 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
8748 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8749 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_7
8750 ; RV64ZVE32F-NEXT: .LBB76_14: # %cond.load10
8751 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8752 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8753 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8754 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8755 ; RV64ZVE32F-NEXT: add a2, a0, a2
8756 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8757 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
8758 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8759 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
8760 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8761 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_8
8762 ; RV64ZVE32F-NEXT: j .LBB76_9
8763 ; RV64ZVE32F-NEXT: .LBB76_15: # %cond.load16
8764 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8765 ; RV64ZVE32F-NEXT: andi a2, a2, 255
8766 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8767 ; RV64ZVE32F-NEXT: add a2, a0, a2
8768 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8769 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8770 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
8771 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
8772 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
8773 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8774 ; RV64ZVE32F-NEXT: beqz a1, .LBB76_11
8775 ; RV64ZVE32F-NEXT: .LBB76_16: # %cond.load19
8776 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8777 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8778 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
8779 ; RV64ZVE32F-NEXT: andi a1, a1, 255
8780 ; RV64ZVE32F-NEXT: slli a1, a1, 2
8781 ; RV64ZVE32F-NEXT: add a0, a0, a1
8782 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
8783 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8784 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8785 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8786 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
8787 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
8788 ; RV64ZVE32F-NEXT: ret
8789 %eidxs = zext <8 x i8> %idxs to <8 x i32>
8790 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
8791 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
8795 define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
8796 ; RV32-LABEL: mgather_baseidx_v8i16_v8f32:
8798 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
8799 ; RV32-NEXT: vsext.vf2 v12, v8
8800 ; RV32-NEXT: vsll.vi v8, v12, 2
8801 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
8802 ; RV32-NEXT: vmv.v.v v8, v10
8805 ; RV64V-LABEL: mgather_baseidx_v8i16_v8f32:
8807 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8808 ; RV64V-NEXT: vsext.vf4 v12, v8
8809 ; RV64V-NEXT: vsll.vi v12, v12, 2
8810 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
8811 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
8812 ; RV64V-NEXT: vmv.v.v v8, v10
8815 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f32:
8816 ; RV64ZVE32F: # %bb.0:
8817 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8818 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8819 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8820 ; RV64ZVE32F-NEXT: beqz a2, .LBB77_2
8821 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
8822 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8823 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8824 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8825 ; RV64ZVE32F-NEXT: add a2, a0, a2
8826 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8827 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
8828 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
8829 ; RV64ZVE32F-NEXT: .LBB77_2: # %else
8830 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8831 ; RV64ZVE32F-NEXT: beqz a2, .LBB77_4
8832 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
8833 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
8834 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8835 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8836 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8837 ; RV64ZVE32F-NEXT: add a2, a0, a2
8838 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8839 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8840 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
8841 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
8842 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
8843 ; RV64ZVE32F-NEXT: .LBB77_4: # %else2
8844 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
8845 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
8846 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
8847 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8848 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
8849 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_12
8850 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
8851 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8852 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_13
8853 ; RV64ZVE32F-NEXT: .LBB77_6: # %else8
8854 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8855 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_14
8856 ; RV64ZVE32F-NEXT: .LBB77_7: # %else11
8857 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8858 ; RV64ZVE32F-NEXT: beqz a2, .LBB77_9
8859 ; RV64ZVE32F-NEXT: .LBB77_8: # %cond.load13
8860 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
8861 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
8862 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8863 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8864 ; RV64ZVE32F-NEXT: add a2, a0, a2
8865 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8866 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8867 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8868 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
8869 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
8870 ; RV64ZVE32F-NEXT: .LBB77_9: # %else14
8871 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
8872 ; RV64ZVE32F-NEXT: andi a2, a1, 64
8873 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
8874 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_15
8875 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
8876 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8877 ; RV64ZVE32F-NEXT: bnez a1, .LBB77_16
8878 ; RV64ZVE32F-NEXT: .LBB77_11: # %else20
8879 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
8880 ; RV64ZVE32F-NEXT: ret
8881 ; RV64ZVE32F-NEXT: .LBB77_12: # %cond.load4
8882 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8883 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8884 ; RV64ZVE32F-NEXT: add a2, a0, a2
8885 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8886 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8887 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
8888 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
8889 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
8890 ; RV64ZVE32F-NEXT: andi a2, a1, 8
8891 ; RV64ZVE32F-NEXT: beqz a2, .LBB77_6
8892 ; RV64ZVE32F-NEXT: .LBB77_13: # %cond.load7
8893 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
8894 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8895 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8896 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8897 ; RV64ZVE32F-NEXT: add a2, a0, a2
8898 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8899 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8900 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8901 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
8902 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
8903 ; RV64ZVE32F-NEXT: andi a2, a1, 16
8904 ; RV64ZVE32F-NEXT: beqz a2, .LBB77_7
8905 ; RV64ZVE32F-NEXT: .LBB77_14: # %cond.load10
8906 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
8907 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8908 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8909 ; RV64ZVE32F-NEXT: add a2, a0, a2
8910 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8911 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
8912 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8913 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
8914 ; RV64ZVE32F-NEXT: andi a2, a1, 32
8915 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_8
8916 ; RV64ZVE32F-NEXT: j .LBB77_9
8917 ; RV64ZVE32F-NEXT: .LBB77_15: # %cond.load16
8918 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8919 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8920 ; RV64ZVE32F-NEXT: add a2, a0, a2
8921 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8922 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8923 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
8924 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
8925 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
8926 ; RV64ZVE32F-NEXT: andi a1, a1, -128
8927 ; RV64ZVE32F-NEXT: beqz a1, .LBB77_11
8928 ; RV64ZVE32F-NEXT: .LBB77_16: # %cond.load19
8929 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
8930 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
8931 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
8932 ; RV64ZVE32F-NEXT: slli a1, a1, 2
8933 ; RV64ZVE32F-NEXT: add a0, a0, a1
8934 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
8935 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8936 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
8937 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
8938 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
8939 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
8940 ; RV64ZVE32F-NEXT: ret
8941 %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
8942 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
8946 define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
8947 ; RV32-LABEL: mgather_baseidx_sext_v8i16_v8f32:
8949 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
8950 ; RV32-NEXT: vsext.vf2 v12, v8
8951 ; RV32-NEXT: vsll.vi v8, v12, 2
8952 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
8953 ; RV32-NEXT: vmv.v.v v8, v10
8956 ; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8f32:
8958 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
8959 ; RV64V-NEXT: vsext.vf4 v12, v8
8960 ; RV64V-NEXT: vsll.vi v12, v12, 2
8961 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
8962 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
8963 ; RV64V-NEXT: vmv.v.v v8, v10
8966 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f32:
8967 ; RV64ZVE32F: # %bb.0:
8968 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
8969 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
8970 ; RV64ZVE32F-NEXT: andi a2, a1, 1
8971 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_2
8972 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
8973 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
8974 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
8975 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8976 ; RV64ZVE32F-NEXT: add a2, a0, a2
8977 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8978 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
8979 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
8980 ; RV64ZVE32F-NEXT: .LBB78_2: # %else
8981 ; RV64ZVE32F-NEXT: andi a2, a1, 2
8982 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_4
8983 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
8984 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
8985 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
8986 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
8987 ; RV64ZVE32F-NEXT: slli a2, a2, 2
8988 ; RV64ZVE32F-NEXT: add a2, a0, a2
8989 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
8990 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
8991 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
8992 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
8993 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
8994 ; RV64ZVE32F-NEXT: .LBB78_4: # %else2
8995 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
8996 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
8997 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
8998 ; RV64ZVE32F-NEXT: andi a2, a1, 4
8999 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9000 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_12
9001 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
9002 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9003 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_13
9004 ; RV64ZVE32F-NEXT: .LBB78_6: # %else8
9005 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9006 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_14
9007 ; RV64ZVE32F-NEXT: .LBB78_7: # %else11
9008 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9009 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_9
9010 ; RV64ZVE32F-NEXT: .LBB78_8: # %cond.load13
9011 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9012 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
9013 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9014 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9015 ; RV64ZVE32F-NEXT: add a2, a0, a2
9016 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9017 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9018 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9019 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
9020 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
9021 ; RV64ZVE32F-NEXT: .LBB78_9: # %else14
9022 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9023 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9024 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
9025 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_15
9026 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
9027 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9028 ; RV64ZVE32F-NEXT: bnez a1, .LBB78_16
9029 ; RV64ZVE32F-NEXT: .LBB78_11: # %else20
9030 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
9031 ; RV64ZVE32F-NEXT: ret
9032 ; RV64ZVE32F-NEXT: .LBB78_12: # %cond.load4
9033 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9034 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9035 ; RV64ZVE32F-NEXT: add a2, a0, a2
9036 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9037 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9038 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
9039 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
9040 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
9041 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9042 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_6
9043 ; RV64ZVE32F-NEXT: .LBB78_13: # %cond.load7
9044 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9045 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9046 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9047 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9048 ; RV64ZVE32F-NEXT: add a2, a0, a2
9049 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9050 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9051 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9052 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
9053 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
9054 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9055 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_7
9056 ; RV64ZVE32F-NEXT: .LBB78_14: # %cond.load10
9057 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9058 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
9059 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9060 ; RV64ZVE32F-NEXT: add a2, a0, a2
9061 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9062 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
9063 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9064 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
9065 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9066 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_8
9067 ; RV64ZVE32F-NEXT: j .LBB78_9
9068 ; RV64ZVE32F-NEXT: .LBB78_15: # %cond.load16
9069 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9070 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9071 ; RV64ZVE32F-NEXT: add a2, a0, a2
9072 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9073 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9074 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
9075 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
9076 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
9077 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9078 ; RV64ZVE32F-NEXT: beqz a1, .LBB78_11
9079 ; RV64ZVE32F-NEXT: .LBB78_16: # %cond.load19
9080 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9081 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9082 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
9083 ; RV64ZVE32F-NEXT: slli a1, a1, 2
9084 ; RV64ZVE32F-NEXT: add a0, a0, a1
9085 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
9086 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9087 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9088 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9089 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
9090 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
9091 ; RV64ZVE32F-NEXT: ret
9092 %eidxs = sext <8 x i16> %idxs to <8 x i32>
9093 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
9094 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
9098 define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
9099 ; RV32-LABEL: mgather_baseidx_zext_v8i16_v8f32:
9101 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
9102 ; RV32-NEXT: vzext.vf2 v12, v8
9103 ; RV32-NEXT: vsll.vi v8, v12, 2
9104 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
9105 ; RV32-NEXT: vmv.v.v v8, v10
9108 ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f32:
9110 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
9111 ; RV64V-NEXT: vzext.vf2 v12, v8
9112 ; RV64V-NEXT: vsll.vi v8, v12, 2
9113 ; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t
9114 ; RV64V-NEXT: vmv.v.v v8, v10
9117 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f32:
9118 ; RV64ZVE32F: # %bb.0:
9119 ; RV64ZVE32F-NEXT: lui a1, 16
9120 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9121 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
9122 ; RV64ZVE32F-NEXT: andi a3, a2, 1
9123 ; RV64ZVE32F-NEXT: addiw a1, a1, -1
9124 ; RV64ZVE32F-NEXT: beqz a3, .LBB79_2
9125 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
9126 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
9127 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9128 ; RV64ZVE32F-NEXT: and a3, a3, a1
9129 ; RV64ZVE32F-NEXT: slli a3, a3, 2
9130 ; RV64ZVE32F-NEXT: add a3, a0, a3
9131 ; RV64ZVE32F-NEXT: flw fa5, 0(a3)
9132 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
9133 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
9134 ; RV64ZVE32F-NEXT: .LBB79_2: # %else
9135 ; RV64ZVE32F-NEXT: andi a3, a2, 2
9136 ; RV64ZVE32F-NEXT: beqz a3, .LBB79_4
9137 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
9138 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9139 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9140 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
9141 ; RV64ZVE32F-NEXT: and a3, a3, a1
9142 ; RV64ZVE32F-NEXT: slli a3, a3, 2
9143 ; RV64ZVE32F-NEXT: add a3, a0, a3
9144 ; RV64ZVE32F-NEXT: flw fa5, 0(a3)
9145 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9146 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
9147 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
9148 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1
9149 ; RV64ZVE32F-NEXT: .LBB79_4: # %else2
9150 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
9151 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
9152 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9153 ; RV64ZVE32F-NEXT: andi a3, a2, 4
9154 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9155 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_12
9156 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
9157 ; RV64ZVE32F-NEXT: andi a3, a2, 8
9158 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_13
9159 ; RV64ZVE32F-NEXT: .LBB79_6: # %else8
9160 ; RV64ZVE32F-NEXT: andi a3, a2, 16
9161 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_14
9162 ; RV64ZVE32F-NEXT: .LBB79_7: # %else11
9163 ; RV64ZVE32F-NEXT: andi a3, a2, 32
9164 ; RV64ZVE32F-NEXT: beqz a3, .LBB79_9
9165 ; RV64ZVE32F-NEXT: .LBB79_8: # %cond.load13
9166 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9167 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
9168 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9169 ; RV64ZVE32F-NEXT: and a3, a3, a1
9170 ; RV64ZVE32F-NEXT: slli a3, a3, 2
9171 ; RV64ZVE32F-NEXT: add a3, a0, a3
9172 ; RV64ZVE32F-NEXT: flw fa5, 0(a3)
9173 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9174 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9175 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
9176 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
9177 ; RV64ZVE32F-NEXT: .LBB79_9: # %else14
9178 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
9179 ; RV64ZVE32F-NEXT: andi a3, a2, 64
9180 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
9181 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_15
9182 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
9183 ; RV64ZVE32F-NEXT: andi a2, a2, -128
9184 ; RV64ZVE32F-NEXT: bnez a2, .LBB79_16
9185 ; RV64ZVE32F-NEXT: .LBB79_11: # %else20
9186 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
9187 ; RV64ZVE32F-NEXT: ret
9188 ; RV64ZVE32F-NEXT: .LBB79_12: # %cond.load4
9189 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9190 ; RV64ZVE32F-NEXT: and a3, a3, a1
9191 ; RV64ZVE32F-NEXT: slli a3, a3, 2
9192 ; RV64ZVE32F-NEXT: add a3, a0, a3
9193 ; RV64ZVE32F-NEXT: flw fa5, 0(a3)
9194 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9195 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
9196 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
9197 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
9198 ; RV64ZVE32F-NEXT: andi a3, a2, 8
9199 ; RV64ZVE32F-NEXT: beqz a3, .LBB79_6
9200 ; RV64ZVE32F-NEXT: .LBB79_13: # %cond.load7
9201 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9202 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9203 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9204 ; RV64ZVE32F-NEXT: and a3, a3, a1
9205 ; RV64ZVE32F-NEXT: slli a3, a3, 2
9206 ; RV64ZVE32F-NEXT: add a3, a0, a3
9207 ; RV64ZVE32F-NEXT: flw fa5, 0(a3)
9208 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9209 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9210 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
9211 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
9212 ; RV64ZVE32F-NEXT: andi a3, a2, 16
9213 ; RV64ZVE32F-NEXT: beqz a3, .LBB79_7
9214 ; RV64ZVE32F-NEXT: .LBB79_14: # %cond.load10
9215 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9216 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
9217 ; RV64ZVE32F-NEXT: and a3, a3, a1
9218 ; RV64ZVE32F-NEXT: slli a3, a3, 2
9219 ; RV64ZVE32F-NEXT: add a3, a0, a3
9220 ; RV64ZVE32F-NEXT: flw fa5, 0(a3)
9221 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
9222 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9223 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
9224 ; RV64ZVE32F-NEXT: andi a3, a2, 32
9225 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_8
9226 ; RV64ZVE32F-NEXT: j .LBB79_9
9227 ; RV64ZVE32F-NEXT: .LBB79_15: # %cond.load16
9228 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
9229 ; RV64ZVE32F-NEXT: and a3, a3, a1
9230 ; RV64ZVE32F-NEXT: slli a3, a3, 2
9231 ; RV64ZVE32F-NEXT: add a3, a0, a3
9232 ; RV64ZVE32F-NEXT: flw fa5, 0(a3)
9233 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9234 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
9235 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
9236 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
9237 ; RV64ZVE32F-NEXT: andi a2, a2, -128
9238 ; RV64ZVE32F-NEXT: beqz a2, .LBB79_11
9239 ; RV64ZVE32F-NEXT: .LBB79_16: # %cond.load19
9240 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
9241 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9242 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9243 ; RV64ZVE32F-NEXT: and a1, a2, a1
9244 ; RV64ZVE32F-NEXT: slli a1, a1, 2
9245 ; RV64ZVE32F-NEXT: add a0, a0, a1
9246 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
9247 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9248 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9249 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9250 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
9251 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
9252 ; RV64ZVE32F-NEXT: ret
9253 %eidxs = zext <8 x i16> %idxs to <8 x i32>
9254 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
9255 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
9259 define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x float> %passthru) {
9260 ; RV32-LABEL: mgather_baseidx_v8f32:
9262 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
9263 ; RV32-NEXT: vsll.vi v8, v8, 2
9264 ; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
9265 ; RV32-NEXT: vmv.v.v v8, v10
9268 ; RV64V-LABEL: mgather_baseidx_v8f32:
9270 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
9271 ; RV64V-NEXT: vsext.vf2 v12, v8
9272 ; RV64V-NEXT: vsll.vi v12, v12, 2
9273 ; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
9274 ; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
9275 ; RV64V-NEXT: vmv.v.v v8, v10
9278 ; RV64ZVE32F-LABEL: mgather_baseidx_v8f32:
9279 ; RV64ZVE32F: # %bb.0:
9280 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9281 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
9282 ; RV64ZVE32F-NEXT: andi a2, a1, 1
9283 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_2
9284 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
9285 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
9286 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9287 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9288 ; RV64ZVE32F-NEXT: add a2, a0, a2
9289 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9290 ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
9291 ; RV64ZVE32F-NEXT: .LBB80_2: # %else
9292 ; RV64ZVE32F-NEXT: andi a2, a1, 2
9293 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_4
9294 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
9295 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9296 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
9297 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
9298 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9299 ; RV64ZVE32F-NEXT: add a2, a0, a2
9300 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9301 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
9302 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
9303 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
9304 ; RV64ZVE32F-NEXT: .LBB80_4: # %else2
9305 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
9306 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
9307 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
9308 ; RV64ZVE32F-NEXT: andi a2, a1, 4
9309 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
9310 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_12
9311 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
9312 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9313 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_13
9314 ; RV64ZVE32F-NEXT: .LBB80_6: # %else8
9315 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9316 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_14
9317 ; RV64ZVE32F-NEXT: .LBB80_7: # %else11
9318 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9319 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_9
9320 ; RV64ZVE32F-NEXT: .LBB80_8: # %cond.load13
9321 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9322 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1
9323 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9324 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9325 ; RV64ZVE32F-NEXT: add a2, a0, a2
9326 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9327 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9328 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
9329 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
9330 ; RV64ZVE32F-NEXT: .LBB80_9: # %else14
9331 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
9332 ; RV64ZVE32F-NEXT: andi a2, a1, 64
9333 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2
9334 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_15
9335 ; RV64ZVE32F-NEXT: # %bb.10: # %else17
9336 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9337 ; RV64ZVE32F-NEXT: bnez a1, .LBB80_16
9338 ; RV64ZVE32F-NEXT: .LBB80_11: # %else20
9339 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
9340 ; RV64ZVE32F-NEXT: ret
9341 ; RV64ZVE32F-NEXT: .LBB80_12: # %cond.load4
9342 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9343 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9344 ; RV64ZVE32F-NEXT: add a2, a0, a2
9345 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9346 ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
9347 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
9348 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 2
9349 ; RV64ZVE32F-NEXT: andi a2, a1, 8
9350 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_6
9351 ; RV64ZVE32F-NEXT: .LBB80_13: # %cond.load7
9352 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9353 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9354 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9355 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9356 ; RV64ZVE32F-NEXT: add a2, a0, a2
9357 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9358 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9359 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
9360 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
9361 ; RV64ZVE32F-NEXT: andi a2, a1, 16
9362 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_7
9363 ; RV64ZVE32F-NEXT: .LBB80_14: # %cond.load10
9364 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
9365 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
9366 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9367 ; RV64ZVE32F-NEXT: add a2, a0, a2
9368 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9369 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9370 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
9371 ; RV64ZVE32F-NEXT: andi a2, a1, 32
9372 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_8
9373 ; RV64ZVE32F-NEXT: j .LBB80_9
9374 ; RV64ZVE32F-NEXT: .LBB80_15: # %cond.load16
9375 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9376 ; RV64ZVE32F-NEXT: slli a2, a2, 2
9377 ; RV64ZVE32F-NEXT: add a2, a0, a2
9378 ; RV64ZVE32F-NEXT: flw fa5, 0(a2)
9379 ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
9380 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
9381 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
9382 ; RV64ZVE32F-NEXT: andi a1, a1, -128
9383 ; RV64ZVE32F-NEXT: beqz a1, .LBB80_11
9384 ; RV64ZVE32F-NEXT: .LBB80_16: # %cond.load19
9385 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9386 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9387 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
9388 ; RV64ZVE32F-NEXT: slli a1, a1, 2
9389 ; RV64ZVE32F-NEXT: add a0, a0, a1
9390 ; RV64ZVE32F-NEXT: flw fa5, 0(a0)
9391 ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
9392 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9393 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
9394 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
9395 ; RV64ZVE32F-NEXT: ret
9396 %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
9397 %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
9401 declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>)
9403 define <1 x double> @mgather_v1f64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x double> %passthru) {
9404 ; RV32V-LABEL: mgather_v1f64:
9406 ; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
9407 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
9408 ; RV32V-NEXT: vmv.v.v v8, v9
9411 ; RV64V-LABEL: mgather_v1f64:
9413 ; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
9414 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
9415 ; RV64V-NEXT: vmv.v.v v8, v9
9418 ; RV32ZVE32F-LABEL: mgather_v1f64:
9419 ; RV32ZVE32F: # %bb.0:
9420 ; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
9421 ; RV32ZVE32F-NEXT: vfirst.m a0, v0
9422 ; RV32ZVE32F-NEXT: bnez a0, .LBB81_2
9423 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
9424 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9425 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9426 ; RV32ZVE32F-NEXT: fld fa0, 0(a0)
9427 ; RV32ZVE32F-NEXT: .LBB81_2: # %else
9428 ; RV32ZVE32F-NEXT: ret
9430 ; RV64ZVE32F-LABEL: mgather_v1f64:
9431 ; RV64ZVE32F: # %bb.0:
9432 ; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
9433 ; RV64ZVE32F-NEXT: vfirst.m a1, v0
9434 ; RV64ZVE32F-NEXT: bnez a1, .LBB81_2
9435 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
9436 ; RV64ZVE32F-NEXT: fld fa0, 0(a0)
9437 ; RV64ZVE32F-NEXT: .LBB81_2: # %else
9438 ; RV64ZVE32F-NEXT: ret
9439 %v = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> %ptrs, i32 8, <1 x i1> %m, <1 x double> %passthru)
9443 declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>)
9445 define <2 x double> @mgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x double> %passthru) {
9446 ; RV32V-LABEL: mgather_v2f64:
9448 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
9449 ; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
9450 ; RV32V-NEXT: vmv.v.v v8, v9
9453 ; RV64V-LABEL: mgather_v2f64:
9455 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
9456 ; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
9457 ; RV64V-NEXT: vmv.v.v v8, v9
9460 ; RV32ZVE32F-LABEL: mgather_v2f64:
9461 ; RV32ZVE32F: # %bb.0:
9462 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9463 ; RV32ZVE32F-NEXT: vmv.x.s a0, v0
9464 ; RV32ZVE32F-NEXT: andi a1, a0, 1
9465 ; RV32ZVE32F-NEXT: bnez a1, .LBB82_3
9466 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9467 ; RV32ZVE32F-NEXT: andi a0, a0, 2
9468 ; RV32ZVE32F-NEXT: bnez a0, .LBB82_4
9469 ; RV32ZVE32F-NEXT: .LBB82_2: # %else2
9470 ; RV32ZVE32F-NEXT: ret
9471 ; RV32ZVE32F-NEXT: .LBB82_3: # %cond.load
9472 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9473 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
9474 ; RV32ZVE32F-NEXT: fld fa0, 0(a1)
9475 ; RV32ZVE32F-NEXT: andi a0, a0, 2
9476 ; RV32ZVE32F-NEXT: beqz a0, .LBB82_2
9477 ; RV32ZVE32F-NEXT: .LBB82_4: # %cond.load1
9478 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9479 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
9480 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
9481 ; RV32ZVE32F-NEXT: fld fa1, 0(a0)
9482 ; RV32ZVE32F-NEXT: ret
9484 ; RV64ZVE32F-LABEL: mgather_v2f64:
9485 ; RV64ZVE32F: # %bb.0:
9486 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9487 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
9488 ; RV64ZVE32F-NEXT: andi a3, a2, 1
9489 ; RV64ZVE32F-NEXT: bnez a3, .LBB82_3
9490 ; RV64ZVE32F-NEXT: # %bb.1: # %else
9491 ; RV64ZVE32F-NEXT: andi a2, a2, 2
9492 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_4
9493 ; RV64ZVE32F-NEXT: .LBB82_2: # %else2
9494 ; RV64ZVE32F-NEXT: ret
9495 ; RV64ZVE32F-NEXT: .LBB82_3: # %cond.load
9496 ; RV64ZVE32F-NEXT: fld fa0, 0(a0)
9497 ; RV64ZVE32F-NEXT: andi a2, a2, 2
9498 ; RV64ZVE32F-NEXT: beqz a2, .LBB82_2
9499 ; RV64ZVE32F-NEXT: .LBB82_4: # %cond.load1
9500 ; RV64ZVE32F-NEXT: fld fa1, 0(a1)
9501 ; RV64ZVE32F-NEXT: ret
9502 %v = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 8, <2 x i1> %m, <2 x double> %passthru)
9506 declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>)
9508 define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %passthru) {
9509 ; RV32V-LABEL: mgather_v4f64:
9511 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
9512 ; RV32V-NEXT: vluxei32.v v10, (zero), v8, v0.t
9513 ; RV32V-NEXT: vmv.v.v v8, v10
9516 ; RV64V-LABEL: mgather_v4f64:
9518 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
9519 ; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
9520 ; RV64V-NEXT: vmv.v.v v8, v10
9523 ; RV32ZVE32F-LABEL: mgather_v4f64:
9524 ; RV32ZVE32F: # %bb.0:
9525 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9526 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
9527 ; RV32ZVE32F-NEXT: andi a2, a1, 1
9528 ; RV32ZVE32F-NEXT: bnez a2, .LBB83_6
9529 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9530 ; RV32ZVE32F-NEXT: andi a2, a1, 2
9531 ; RV32ZVE32F-NEXT: bnez a2, .LBB83_7
9532 ; RV32ZVE32F-NEXT: .LBB83_2: # %else2
9533 ; RV32ZVE32F-NEXT: andi a2, a1, 4
9534 ; RV32ZVE32F-NEXT: bnez a2, .LBB83_8
9535 ; RV32ZVE32F-NEXT: .LBB83_3: # %else5
9536 ; RV32ZVE32F-NEXT: andi a1, a1, 8
9537 ; RV32ZVE32F-NEXT: beqz a1, .LBB83_5
9538 ; RV32ZVE32F-NEXT: .LBB83_4: # %cond.load7
9539 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9540 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
9541 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
9542 ; RV32ZVE32F-NEXT: fld fa3, 0(a1)
9543 ; RV32ZVE32F-NEXT: .LBB83_5: # %else8
9544 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9545 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
9546 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
9547 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
9548 ; RV32ZVE32F-NEXT: ret
9549 ; RV32ZVE32F-NEXT: .LBB83_6: # %cond.load
9550 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9551 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
9552 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
9553 ; RV32ZVE32F-NEXT: andi a2, a1, 2
9554 ; RV32ZVE32F-NEXT: beqz a2, .LBB83_2
9555 ; RV32ZVE32F-NEXT: .LBB83_7: # %cond.load1
9556 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9557 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9558 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9
9559 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
9560 ; RV32ZVE32F-NEXT: andi a2, a1, 4
9561 ; RV32ZVE32F-NEXT: beqz a2, .LBB83_3
9562 ; RV32ZVE32F-NEXT: .LBB83_8: # %cond.load4
9563 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9564 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
9565 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9
9566 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
9567 ; RV32ZVE32F-NEXT: andi a1, a1, 8
9568 ; RV32ZVE32F-NEXT: bnez a1, .LBB83_4
9569 ; RV32ZVE32F-NEXT: j .LBB83_5
9571 ; RV64ZVE32F-LABEL: mgather_v4f64:
9572 ; RV64ZVE32F: # %bb.0:
9573 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9574 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
9575 ; RV64ZVE32F-NEXT: andi a3, a2, 1
9576 ; RV64ZVE32F-NEXT: bnez a3, .LBB83_6
9577 ; RV64ZVE32F-NEXT: # %bb.1: # %else
9578 ; RV64ZVE32F-NEXT: andi a3, a2, 2
9579 ; RV64ZVE32F-NEXT: bnez a3, .LBB83_7
9580 ; RV64ZVE32F-NEXT: .LBB83_2: # %else2
9581 ; RV64ZVE32F-NEXT: andi a3, a2, 4
9582 ; RV64ZVE32F-NEXT: bnez a3, .LBB83_8
9583 ; RV64ZVE32F-NEXT: .LBB83_3: # %else5
9584 ; RV64ZVE32F-NEXT: andi a2, a2, 8
9585 ; RV64ZVE32F-NEXT: beqz a2, .LBB83_5
9586 ; RV64ZVE32F-NEXT: .LBB83_4: # %cond.load7
9587 ; RV64ZVE32F-NEXT: ld a1, 24(a1)
9588 ; RV64ZVE32F-NEXT: fld fa3, 0(a1)
9589 ; RV64ZVE32F-NEXT: .LBB83_5: # %else8
9590 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
9591 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
9592 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
9593 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
9594 ; RV64ZVE32F-NEXT: ret
9595 ; RV64ZVE32F-NEXT: .LBB83_6: # %cond.load
9596 ; RV64ZVE32F-NEXT: ld a3, 0(a1)
9597 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
9598 ; RV64ZVE32F-NEXT: andi a3, a2, 2
9599 ; RV64ZVE32F-NEXT: beqz a3, .LBB83_2
9600 ; RV64ZVE32F-NEXT: .LBB83_7: # %cond.load1
9601 ; RV64ZVE32F-NEXT: ld a3, 8(a1)
9602 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
9603 ; RV64ZVE32F-NEXT: andi a3, a2, 4
9604 ; RV64ZVE32F-NEXT: beqz a3, .LBB83_3
9605 ; RV64ZVE32F-NEXT: .LBB83_8: # %cond.load4
9606 ; RV64ZVE32F-NEXT: ld a3, 16(a1)
9607 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
9608 ; RV64ZVE32F-NEXT: andi a2, a2, 8
9609 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_4
9610 ; RV64ZVE32F-NEXT: j .LBB83_5
9611 %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x double> %passthru)
9615 define <4 x double> @mgather_truemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passthru) {
9616 ; RV32V-LABEL: mgather_truemask_v4f64:
9618 ; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
9619 ; RV32V-NEXT: vluxei32.v v10, (zero), v8
9620 ; RV32V-NEXT: vmv.v.v v8, v10
9623 ; RV64V-LABEL: mgather_truemask_v4f64:
9625 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
9626 ; RV64V-NEXT: vluxei64.v v8, (zero), v8
9629 ; RV32ZVE32F-LABEL: mgather_truemask_v4f64:
9630 ; RV32ZVE32F: # %bb.0:
9631 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
9632 ; RV32ZVE32F-NEXT: vmset.m v9
9633 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
9634 ; RV32ZVE32F-NEXT: beqz zero, .LBB84_6
9635 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9636 ; RV32ZVE32F-NEXT: andi a2, a1, 2
9637 ; RV32ZVE32F-NEXT: bnez a2, .LBB84_7
9638 ; RV32ZVE32F-NEXT: .LBB84_2: # %else2
9639 ; RV32ZVE32F-NEXT: andi a2, a1, 4
9640 ; RV32ZVE32F-NEXT: bnez a2, .LBB84_8
9641 ; RV32ZVE32F-NEXT: .LBB84_3: # %else5
9642 ; RV32ZVE32F-NEXT: andi a1, a1, 8
9643 ; RV32ZVE32F-NEXT: beqz a1, .LBB84_5
9644 ; RV32ZVE32F-NEXT: .LBB84_4: # %cond.load7
9645 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9646 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
9647 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
9648 ; RV32ZVE32F-NEXT: fld fa3, 0(a1)
9649 ; RV32ZVE32F-NEXT: .LBB84_5: # %else8
9650 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9651 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
9652 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
9653 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
9654 ; RV32ZVE32F-NEXT: ret
9655 ; RV32ZVE32F-NEXT: .LBB84_6: # %cond.load
9656 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9657 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
9658 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
9659 ; RV32ZVE32F-NEXT: andi a2, a1, 2
9660 ; RV32ZVE32F-NEXT: beqz a2, .LBB84_2
9661 ; RV32ZVE32F-NEXT: .LBB84_7: # %cond.load1
9662 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9663 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
9664 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9
9665 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
9666 ; RV32ZVE32F-NEXT: andi a2, a1, 4
9667 ; RV32ZVE32F-NEXT: beqz a2, .LBB84_3
9668 ; RV32ZVE32F-NEXT: .LBB84_8: # %cond.load4
9669 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9670 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
9671 ; RV32ZVE32F-NEXT: vmv.x.s a2, v9
9672 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
9673 ; RV32ZVE32F-NEXT: andi a1, a1, 8
9674 ; RV32ZVE32F-NEXT: bnez a1, .LBB84_4
9675 ; RV32ZVE32F-NEXT: j .LBB84_5
9677 ; RV64ZVE32F-LABEL: mgather_truemask_v4f64:
9678 ; RV64ZVE32F: # %bb.0:
9679 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
9680 ; RV64ZVE32F-NEXT: vmset.m v8
9681 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
9682 ; RV64ZVE32F-NEXT: beqz zero, .LBB84_6
9683 ; RV64ZVE32F-NEXT: # %bb.1: # %else
9684 ; RV64ZVE32F-NEXT: andi a3, a2, 2
9685 ; RV64ZVE32F-NEXT: bnez a3, .LBB84_7
9686 ; RV64ZVE32F-NEXT: .LBB84_2: # %else2
9687 ; RV64ZVE32F-NEXT: andi a3, a2, 4
9688 ; RV64ZVE32F-NEXT: bnez a3, .LBB84_8
9689 ; RV64ZVE32F-NEXT: .LBB84_3: # %else5
9690 ; RV64ZVE32F-NEXT: andi a2, a2, 8
9691 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_5
9692 ; RV64ZVE32F-NEXT: .LBB84_4: # %cond.load7
9693 ; RV64ZVE32F-NEXT: ld a1, 24(a1)
9694 ; RV64ZVE32F-NEXT: fld fa3, 0(a1)
9695 ; RV64ZVE32F-NEXT: .LBB84_5: # %else8
9696 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
9697 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
9698 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
9699 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
9700 ; RV64ZVE32F-NEXT: ret
9701 ; RV64ZVE32F-NEXT: .LBB84_6: # %cond.load
9702 ; RV64ZVE32F-NEXT: ld a3, 0(a1)
9703 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
9704 ; RV64ZVE32F-NEXT: andi a3, a2, 2
9705 ; RV64ZVE32F-NEXT: beqz a3, .LBB84_2
9706 ; RV64ZVE32F-NEXT: .LBB84_7: # %cond.load1
9707 ; RV64ZVE32F-NEXT: ld a3, 8(a1)
9708 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
9709 ; RV64ZVE32F-NEXT: andi a3, a2, 4
9710 ; RV64ZVE32F-NEXT: beqz a3, .LBB84_3
9711 ; RV64ZVE32F-NEXT: .LBB84_8: # %cond.load4
9712 ; RV64ZVE32F-NEXT: ld a3, 16(a1)
9713 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
9714 ; RV64ZVE32F-NEXT: andi a2, a2, 8
9715 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_4
9716 ; RV64ZVE32F-NEXT: j .LBB84_5
9717 %mhead = insertelement <4 x i1> poison, i1 1, i32 0
9718 %mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
9719 %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %mtrue, <4 x double> %passthru)
9723 define <4 x double> @mgather_falsemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passthru) {
9724 ; RV32V-LABEL: mgather_falsemask_v4f64:
9726 ; RV32V-NEXT: vmv2r.v v8, v10
9729 ; RV64V-LABEL: mgather_falsemask_v4f64:
9731 ; RV64V-NEXT: vmv2r.v v8, v10
9734 ; RV32ZVE32F-LABEL: mgather_falsemask_v4f64:
9735 ; RV32ZVE32F: # %bb.0:
9736 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
9737 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
9738 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
9739 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9740 ; RV32ZVE32F-NEXT: ret
9742 ; RV64ZVE32F-LABEL: mgather_falsemask_v4f64:
9743 ; RV64ZVE32F: # %bb.0:
9744 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
9745 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
9746 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
9747 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
9748 ; RV64ZVE32F-NEXT: ret
9749 %v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x double> %passthru)
9753 declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>)
9755 define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %passthru) {
9756 ; RV32V-LABEL: mgather_v8f64:
9758 ; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
9759 ; RV32V-NEXT: vluxei32.v v12, (zero), v8, v0.t
9760 ; RV32V-NEXT: vmv.v.v v8, v12
9763 ; RV64V-LABEL: mgather_v8f64:
9765 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
9766 ; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
9767 ; RV64V-NEXT: vmv.v.v v8, v12
9770 ; RV32ZVE32F-LABEL: mgather_v8f64:
9771 ; RV32ZVE32F: # %bb.0:
9772 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9773 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
9774 ; RV32ZVE32F-NEXT: andi a2, a1, 1
9775 ; RV32ZVE32F-NEXT: bnez a2, .LBB86_10
9776 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9777 ; RV32ZVE32F-NEXT: andi a2, a1, 2
9778 ; RV32ZVE32F-NEXT: bnez a2, .LBB86_11
9779 ; RV32ZVE32F-NEXT: .LBB86_2: # %else2
9780 ; RV32ZVE32F-NEXT: andi a2, a1, 4
9781 ; RV32ZVE32F-NEXT: bnez a2, .LBB86_12
9782 ; RV32ZVE32F-NEXT: .LBB86_3: # %else5
9783 ; RV32ZVE32F-NEXT: andi a2, a1, 8
9784 ; RV32ZVE32F-NEXT: bnez a2, .LBB86_13
9785 ; RV32ZVE32F-NEXT: .LBB86_4: # %else8
9786 ; RV32ZVE32F-NEXT: andi a2, a1, 16
9787 ; RV32ZVE32F-NEXT: bnez a2, .LBB86_14
9788 ; RV32ZVE32F-NEXT: .LBB86_5: # %else11
9789 ; RV32ZVE32F-NEXT: andi a2, a1, 32
9790 ; RV32ZVE32F-NEXT: bnez a2, .LBB86_15
9791 ; RV32ZVE32F-NEXT: .LBB86_6: # %else14
9792 ; RV32ZVE32F-NEXT: andi a2, a1, 64
9793 ; RV32ZVE32F-NEXT: bnez a2, .LBB86_16
9794 ; RV32ZVE32F-NEXT: .LBB86_7: # %else17
9795 ; RV32ZVE32F-NEXT: andi a1, a1, -128
9796 ; RV32ZVE32F-NEXT: beqz a1, .LBB86_9
9797 ; RV32ZVE32F-NEXT: .LBB86_8: # %cond.load19
9798 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9799 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9800 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
9801 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
9802 ; RV32ZVE32F-NEXT: .LBB86_9: # %else20
9803 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
9804 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
9805 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
9806 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
9807 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
9808 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
9809 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
9810 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
9811 ; RV32ZVE32F-NEXT: ret
9812 ; RV32ZVE32F-NEXT: .LBB86_10: # %cond.load
9813 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
9814 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
9815 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
9816 ; RV32ZVE32F-NEXT: andi a2, a1, 2
9817 ; RV32ZVE32F-NEXT: beqz a2, .LBB86_2
9818 ; RV32ZVE32F-NEXT: .LBB86_11: # %cond.load1
9819 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9820 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
9821 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
9822 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
9823 ; RV32ZVE32F-NEXT: andi a2, a1, 4
9824 ; RV32ZVE32F-NEXT: beqz a2, .LBB86_3
9825 ; RV32ZVE32F-NEXT: .LBB86_12: # %cond.load4
9826 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9827 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
9828 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
9829 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
9830 ; RV32ZVE32F-NEXT: andi a2, a1, 8
9831 ; RV32ZVE32F-NEXT: beqz a2, .LBB86_4
9832 ; RV32ZVE32F-NEXT: .LBB86_13: # %cond.load7
9833 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
9834 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
9835 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
9836 ; RV32ZVE32F-NEXT: fld fa3, 0(a2)
9837 ; RV32ZVE32F-NEXT: andi a2, a1, 16
9838 ; RV32ZVE32F-NEXT: beqz a2, .LBB86_5
9839 ; RV32ZVE32F-NEXT: .LBB86_14: # %cond.load10
9840 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9841 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
9842 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
9843 ; RV32ZVE32F-NEXT: fld fa4, 0(a2)
9844 ; RV32ZVE32F-NEXT: andi a2, a1, 32
9845 ; RV32ZVE32F-NEXT: beqz a2, .LBB86_6
9846 ; RV32ZVE32F-NEXT: .LBB86_15: # %cond.load13
9847 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9848 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
9849 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
9850 ; RV32ZVE32F-NEXT: fld fa5, 0(a2)
9851 ; RV32ZVE32F-NEXT: andi a2, a1, 64
9852 ; RV32ZVE32F-NEXT: beqz a2, .LBB86_7
9853 ; RV32ZVE32F-NEXT: .LBB86_16: # %cond.load16
9854 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9855 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
9856 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
9857 ; RV32ZVE32F-NEXT: fld fa6, 0(a2)
9858 ; RV32ZVE32F-NEXT: andi a1, a1, -128
9859 ; RV32ZVE32F-NEXT: bnez a1, .LBB86_8
9860 ; RV32ZVE32F-NEXT: j .LBB86_9
9862 ; RV64ZVE32F-LABEL: mgather_v8f64:
9863 ; RV64ZVE32F: # %bb.0:
9864 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9865 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
9866 ; RV64ZVE32F-NEXT: andi a3, a2, 1
9867 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_10
9868 ; RV64ZVE32F-NEXT: # %bb.1: # %else
9869 ; RV64ZVE32F-NEXT: andi a3, a2, 2
9870 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_11
9871 ; RV64ZVE32F-NEXT: .LBB86_2: # %else2
9872 ; RV64ZVE32F-NEXT: andi a3, a2, 4
9873 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_12
9874 ; RV64ZVE32F-NEXT: .LBB86_3: # %else5
9875 ; RV64ZVE32F-NEXT: andi a3, a2, 8
9876 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_13
9877 ; RV64ZVE32F-NEXT: .LBB86_4: # %else8
9878 ; RV64ZVE32F-NEXT: andi a3, a2, 16
9879 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_14
9880 ; RV64ZVE32F-NEXT: .LBB86_5: # %else11
9881 ; RV64ZVE32F-NEXT: andi a3, a2, 32
9882 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_15
9883 ; RV64ZVE32F-NEXT: .LBB86_6: # %else14
9884 ; RV64ZVE32F-NEXT: andi a3, a2, 64
9885 ; RV64ZVE32F-NEXT: bnez a3, .LBB86_16
9886 ; RV64ZVE32F-NEXT: .LBB86_7: # %else17
9887 ; RV64ZVE32F-NEXT: andi a2, a2, -128
9888 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_9
9889 ; RV64ZVE32F-NEXT: .LBB86_8: # %cond.load19
9890 ; RV64ZVE32F-NEXT: ld a1, 56(a1)
9891 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
9892 ; RV64ZVE32F-NEXT: .LBB86_9: # %else20
9893 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
9894 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
9895 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
9896 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
9897 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
9898 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
9899 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
9900 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
9901 ; RV64ZVE32F-NEXT: ret
9902 ; RV64ZVE32F-NEXT: .LBB86_10: # %cond.load
9903 ; RV64ZVE32F-NEXT: ld a3, 0(a1)
9904 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
9905 ; RV64ZVE32F-NEXT: andi a3, a2, 2
9906 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_2
9907 ; RV64ZVE32F-NEXT: .LBB86_11: # %cond.load1
9908 ; RV64ZVE32F-NEXT: ld a3, 8(a1)
9909 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
9910 ; RV64ZVE32F-NEXT: andi a3, a2, 4
9911 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_3
9912 ; RV64ZVE32F-NEXT: .LBB86_12: # %cond.load4
9913 ; RV64ZVE32F-NEXT: ld a3, 16(a1)
9914 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
9915 ; RV64ZVE32F-NEXT: andi a3, a2, 8
9916 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_4
9917 ; RV64ZVE32F-NEXT: .LBB86_13: # %cond.load7
9918 ; RV64ZVE32F-NEXT: ld a3, 24(a1)
9919 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
9920 ; RV64ZVE32F-NEXT: andi a3, a2, 16
9921 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_5
9922 ; RV64ZVE32F-NEXT: .LBB86_14: # %cond.load10
9923 ; RV64ZVE32F-NEXT: ld a3, 32(a1)
9924 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
9925 ; RV64ZVE32F-NEXT: andi a3, a2, 32
9926 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_6
9927 ; RV64ZVE32F-NEXT: .LBB86_15: # %cond.load13
9928 ; RV64ZVE32F-NEXT: ld a3, 40(a1)
9929 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
9930 ; RV64ZVE32F-NEXT: andi a3, a2, 64
9931 ; RV64ZVE32F-NEXT: beqz a3, .LBB86_7
9932 ; RV64ZVE32F-NEXT: .LBB86_16: # %cond.load16
9933 ; RV64ZVE32F-NEXT: ld a3, 48(a1)
9934 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
9935 ; RV64ZVE32F-NEXT: andi a2, a2, -128
9936 ; RV64ZVE32F-NEXT: bnez a2, .LBB86_8
9937 ; RV64ZVE32F-NEXT: j .LBB86_9
9938 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
9942 define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
9943 ; RV32V-LABEL: mgather_baseidx_v8i8_v8f64:
9945 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9946 ; RV32V-NEXT: vsext.vf4 v10, v8
9947 ; RV32V-NEXT: vsll.vi v8, v10, 3
9948 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
9949 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
9950 ; RV32V-NEXT: vmv.v.v v8, v12
9953 ; RV64V-LABEL: mgather_baseidx_v8i8_v8f64:
9955 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
9956 ; RV64V-NEXT: vsext.vf8 v16, v8
9957 ; RV64V-NEXT: vsll.vi v8, v16, 3
9958 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
9959 ; RV64V-NEXT: vmv.v.v v8, v12
9962 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64:
9963 ; RV32ZVE32F: # %bb.0:
9964 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
9965 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
9966 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
9967 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
9968 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
9969 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
9970 ; RV32ZVE32F-NEXT: andi a2, a1, 1
9971 ; RV32ZVE32F-NEXT: bnez a2, .LBB87_10
9972 ; RV32ZVE32F-NEXT: # %bb.1: # %else
9973 ; RV32ZVE32F-NEXT: andi a2, a1, 2
9974 ; RV32ZVE32F-NEXT: bnez a2, .LBB87_11
9975 ; RV32ZVE32F-NEXT: .LBB87_2: # %else2
9976 ; RV32ZVE32F-NEXT: andi a2, a1, 4
9977 ; RV32ZVE32F-NEXT: bnez a2, .LBB87_12
9978 ; RV32ZVE32F-NEXT: .LBB87_3: # %else5
9979 ; RV32ZVE32F-NEXT: andi a2, a1, 8
9980 ; RV32ZVE32F-NEXT: bnez a2, .LBB87_13
9981 ; RV32ZVE32F-NEXT: .LBB87_4: # %else8
9982 ; RV32ZVE32F-NEXT: andi a2, a1, 16
9983 ; RV32ZVE32F-NEXT: bnez a2, .LBB87_14
9984 ; RV32ZVE32F-NEXT: .LBB87_5: # %else11
9985 ; RV32ZVE32F-NEXT: andi a2, a1, 32
9986 ; RV32ZVE32F-NEXT: bnez a2, .LBB87_15
9987 ; RV32ZVE32F-NEXT: .LBB87_6: # %else14
9988 ; RV32ZVE32F-NEXT: andi a2, a1, 64
9989 ; RV32ZVE32F-NEXT: bnez a2, .LBB87_16
9990 ; RV32ZVE32F-NEXT: .LBB87_7: # %else17
9991 ; RV32ZVE32F-NEXT: andi a1, a1, -128
9992 ; RV32ZVE32F-NEXT: beqz a1, .LBB87_9
9993 ; RV32ZVE32F-NEXT: .LBB87_8: # %cond.load19
9994 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
9995 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
9996 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
9997 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
9998 ; RV32ZVE32F-NEXT: .LBB87_9: # %else20
9999 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
10000 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
10001 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
10002 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
10003 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
10004 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
10005 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
10006 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
10007 ; RV32ZVE32F-NEXT: ret
10008 ; RV32ZVE32F-NEXT: .LBB87_10: # %cond.load
10009 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10010 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
10011 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
10012 ; RV32ZVE32F-NEXT: andi a2, a1, 2
10013 ; RV32ZVE32F-NEXT: beqz a2, .LBB87_2
10014 ; RV32ZVE32F-NEXT: .LBB87_11: # %cond.load1
10015 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10016 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10017 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10018 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
10019 ; RV32ZVE32F-NEXT: andi a2, a1, 4
10020 ; RV32ZVE32F-NEXT: beqz a2, .LBB87_3
10021 ; RV32ZVE32F-NEXT: .LBB87_12: # %cond.load4
10022 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10023 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10024 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10025 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
10026 ; RV32ZVE32F-NEXT: andi a2, a1, 8
10027 ; RV32ZVE32F-NEXT: beqz a2, .LBB87_4
10028 ; RV32ZVE32F-NEXT: .LBB87_13: # %cond.load7
10029 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10030 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10031 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10032 ; RV32ZVE32F-NEXT: fld fa3, 0(a2)
10033 ; RV32ZVE32F-NEXT: andi a2, a1, 16
10034 ; RV32ZVE32F-NEXT: beqz a2, .LBB87_5
10035 ; RV32ZVE32F-NEXT: .LBB87_14: # %cond.load10
10036 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10037 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10038 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10039 ; RV32ZVE32F-NEXT: fld fa4, 0(a2)
10040 ; RV32ZVE32F-NEXT: andi a2, a1, 32
10041 ; RV32ZVE32F-NEXT: beqz a2, .LBB87_6
10042 ; RV32ZVE32F-NEXT: .LBB87_15: # %cond.load13
10043 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10044 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10045 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10046 ; RV32ZVE32F-NEXT: fld fa5, 0(a2)
10047 ; RV32ZVE32F-NEXT: andi a2, a1, 64
10048 ; RV32ZVE32F-NEXT: beqz a2, .LBB87_7
10049 ; RV32ZVE32F-NEXT: .LBB87_16: # %cond.load16
10050 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10051 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10052 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10053 ; RV32ZVE32F-NEXT: fld fa6, 0(a2)
10054 ; RV32ZVE32F-NEXT: andi a1, a1, -128
10055 ; RV32ZVE32F-NEXT: bnez a1, .LBB87_8
10056 ; RV32ZVE32F-NEXT: j .LBB87_9
10058 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64:
10059 ; RV64ZVE32F: # %bb.0:
10060 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10061 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
10062 ; RV64ZVE32F-NEXT: andi a3, a2, 1
10063 ; RV64ZVE32F-NEXT: beqz a3, .LBB87_2
10064 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
10065 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10066 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10067 ; RV64ZVE32F-NEXT: add a3, a1, a3
10068 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
10069 ; RV64ZVE32F-NEXT: .LBB87_2: # %else
10070 ; RV64ZVE32F-NEXT: andi a3, a2, 2
10071 ; RV64ZVE32F-NEXT: beqz a3, .LBB87_4
10072 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
10073 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10074 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
10075 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
10076 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10077 ; RV64ZVE32F-NEXT: add a3, a1, a3
10078 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
10079 ; RV64ZVE32F-NEXT: .LBB87_4: # %else2
10080 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10081 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
10082 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10083 ; RV64ZVE32F-NEXT: andi a3, a2, 4
10084 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10085 ; RV64ZVE32F-NEXT: bnez a3, .LBB87_14
10086 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
10087 ; RV64ZVE32F-NEXT: andi a3, a2, 8
10088 ; RV64ZVE32F-NEXT: bnez a3, .LBB87_15
10089 ; RV64ZVE32F-NEXT: .LBB87_6: # %else8
10090 ; RV64ZVE32F-NEXT: andi a3, a2, 16
10091 ; RV64ZVE32F-NEXT: bnez a3, .LBB87_16
10092 ; RV64ZVE32F-NEXT: .LBB87_7: # %else11
10093 ; RV64ZVE32F-NEXT: andi a3, a2, 32
10094 ; RV64ZVE32F-NEXT: beqz a3, .LBB87_9
10095 ; RV64ZVE32F-NEXT: .LBB87_8: # %cond.load13
10096 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
10097 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10098 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10099 ; RV64ZVE32F-NEXT: add a3, a1, a3
10100 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
10101 ; RV64ZVE32F-NEXT: .LBB87_9: # %else14
10102 ; RV64ZVE32F-NEXT: andi a3, a2, 64
10103 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
10104 ; RV64ZVE32F-NEXT: beqz a3, .LBB87_11
10105 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
10106 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10107 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10108 ; RV64ZVE32F-NEXT: add a3, a1, a3
10109 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
10110 ; RV64ZVE32F-NEXT: .LBB87_11: # %else17
10111 ; RV64ZVE32F-NEXT: andi a2, a2, -128
10112 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_13
10113 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
10114 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10115 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10116 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10117 ; RV64ZVE32F-NEXT: add a1, a1, a2
10118 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
10119 ; RV64ZVE32F-NEXT: .LBB87_13: # %else20
10120 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
10121 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
10122 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
10123 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
10124 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
10125 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
10126 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
10127 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
10128 ; RV64ZVE32F-NEXT: ret
10129 ; RV64ZVE32F-NEXT: .LBB87_14: # %cond.load4
10130 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10131 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10132 ; RV64ZVE32F-NEXT: add a3, a1, a3
10133 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
10134 ; RV64ZVE32F-NEXT: andi a3, a2, 8
10135 ; RV64ZVE32F-NEXT: beqz a3, .LBB87_6
10136 ; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load7
10137 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10138 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10139 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10140 ; RV64ZVE32F-NEXT: add a3, a1, a3
10141 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
10142 ; RV64ZVE32F-NEXT: andi a3, a2, 16
10143 ; RV64ZVE32F-NEXT: beqz a3, .LBB87_7
10144 ; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load10
10145 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
10146 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10147 ; RV64ZVE32F-NEXT: add a3, a1, a3
10148 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
10149 ; RV64ZVE32F-NEXT: andi a3, a2, 32
10150 ; RV64ZVE32F-NEXT: bnez a3, .LBB87_8
10151 ; RV64ZVE32F-NEXT: j .LBB87_9
10152 %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
10153 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
10154 ret <8 x double> %v
10157 define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
10158 ; RV32V-LABEL: mgather_baseidx_sext_v8i8_v8f64:
10160 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10161 ; RV32V-NEXT: vsext.vf4 v10, v8
10162 ; RV32V-NEXT: vsll.vi v8, v10, 3
10163 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
10164 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
10165 ; RV32V-NEXT: vmv.v.v v8, v12
10168 ; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f64:
10170 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
10171 ; RV64V-NEXT: vsext.vf8 v16, v8
10172 ; RV64V-NEXT: vsll.vi v8, v16, 3
10173 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
10174 ; RV64V-NEXT: vmv.v.v v8, v12
10177 ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64:
10178 ; RV32ZVE32F: # %bb.0:
10179 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10180 ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
10181 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
10182 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
10183 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10184 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
10185 ; RV32ZVE32F-NEXT: andi a2, a1, 1
10186 ; RV32ZVE32F-NEXT: bnez a2, .LBB88_10
10187 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10188 ; RV32ZVE32F-NEXT: andi a2, a1, 2
10189 ; RV32ZVE32F-NEXT: bnez a2, .LBB88_11
10190 ; RV32ZVE32F-NEXT: .LBB88_2: # %else2
10191 ; RV32ZVE32F-NEXT: andi a2, a1, 4
10192 ; RV32ZVE32F-NEXT: bnez a2, .LBB88_12
10193 ; RV32ZVE32F-NEXT: .LBB88_3: # %else5
10194 ; RV32ZVE32F-NEXT: andi a2, a1, 8
10195 ; RV32ZVE32F-NEXT: bnez a2, .LBB88_13
10196 ; RV32ZVE32F-NEXT: .LBB88_4: # %else8
10197 ; RV32ZVE32F-NEXT: andi a2, a1, 16
10198 ; RV32ZVE32F-NEXT: bnez a2, .LBB88_14
10199 ; RV32ZVE32F-NEXT: .LBB88_5: # %else11
10200 ; RV32ZVE32F-NEXT: andi a2, a1, 32
10201 ; RV32ZVE32F-NEXT: bnez a2, .LBB88_15
10202 ; RV32ZVE32F-NEXT: .LBB88_6: # %else14
10203 ; RV32ZVE32F-NEXT: andi a2, a1, 64
10204 ; RV32ZVE32F-NEXT: bnez a2, .LBB88_16
10205 ; RV32ZVE32F-NEXT: .LBB88_7: # %else17
10206 ; RV32ZVE32F-NEXT: andi a1, a1, -128
10207 ; RV32ZVE32F-NEXT: beqz a1, .LBB88_9
10208 ; RV32ZVE32F-NEXT: .LBB88_8: # %cond.load19
10209 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10210 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10211 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
10212 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
10213 ; RV32ZVE32F-NEXT: .LBB88_9: # %else20
10214 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
10215 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
10216 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
10217 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
10218 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
10219 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
10220 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
10221 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
10222 ; RV32ZVE32F-NEXT: ret
10223 ; RV32ZVE32F-NEXT: .LBB88_10: # %cond.load
10224 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10225 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
10226 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
10227 ; RV32ZVE32F-NEXT: andi a2, a1, 2
10228 ; RV32ZVE32F-NEXT: beqz a2, .LBB88_2
10229 ; RV32ZVE32F-NEXT: .LBB88_11: # %cond.load1
10230 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10231 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10232 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10233 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
10234 ; RV32ZVE32F-NEXT: andi a2, a1, 4
10235 ; RV32ZVE32F-NEXT: beqz a2, .LBB88_3
10236 ; RV32ZVE32F-NEXT: .LBB88_12: # %cond.load4
10237 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10238 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10239 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10240 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
10241 ; RV32ZVE32F-NEXT: andi a2, a1, 8
10242 ; RV32ZVE32F-NEXT: beqz a2, .LBB88_4
10243 ; RV32ZVE32F-NEXT: .LBB88_13: # %cond.load7
10244 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10245 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10246 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10247 ; RV32ZVE32F-NEXT: fld fa3, 0(a2)
10248 ; RV32ZVE32F-NEXT: andi a2, a1, 16
10249 ; RV32ZVE32F-NEXT: beqz a2, .LBB88_5
10250 ; RV32ZVE32F-NEXT: .LBB88_14: # %cond.load10
10251 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10252 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10253 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10254 ; RV32ZVE32F-NEXT: fld fa4, 0(a2)
10255 ; RV32ZVE32F-NEXT: andi a2, a1, 32
10256 ; RV32ZVE32F-NEXT: beqz a2, .LBB88_6
10257 ; RV32ZVE32F-NEXT: .LBB88_15: # %cond.load13
10258 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10259 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10260 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10261 ; RV32ZVE32F-NEXT: fld fa5, 0(a2)
10262 ; RV32ZVE32F-NEXT: andi a2, a1, 64
10263 ; RV32ZVE32F-NEXT: beqz a2, .LBB88_7
10264 ; RV32ZVE32F-NEXT: .LBB88_16: # %cond.load16
10265 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10266 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10267 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10268 ; RV32ZVE32F-NEXT: fld fa6, 0(a2)
10269 ; RV32ZVE32F-NEXT: andi a1, a1, -128
10270 ; RV32ZVE32F-NEXT: bnez a1, .LBB88_8
10271 ; RV32ZVE32F-NEXT: j .LBB88_9
10273 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64:
10274 ; RV64ZVE32F: # %bb.0:
10275 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10276 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
10277 ; RV64ZVE32F-NEXT: andi a3, a2, 1
10278 ; RV64ZVE32F-NEXT: beqz a3, .LBB88_2
10279 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
10280 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10281 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10282 ; RV64ZVE32F-NEXT: add a3, a1, a3
10283 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
10284 ; RV64ZVE32F-NEXT: .LBB88_2: # %else
10285 ; RV64ZVE32F-NEXT: andi a3, a2, 2
10286 ; RV64ZVE32F-NEXT: beqz a3, .LBB88_4
10287 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
10288 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10289 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
10290 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
10291 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10292 ; RV64ZVE32F-NEXT: add a3, a1, a3
10293 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
10294 ; RV64ZVE32F-NEXT: .LBB88_4: # %else2
10295 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10296 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
10297 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10298 ; RV64ZVE32F-NEXT: andi a3, a2, 4
10299 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10300 ; RV64ZVE32F-NEXT: bnez a3, .LBB88_14
10301 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
10302 ; RV64ZVE32F-NEXT: andi a3, a2, 8
10303 ; RV64ZVE32F-NEXT: bnez a3, .LBB88_15
10304 ; RV64ZVE32F-NEXT: .LBB88_6: # %else8
10305 ; RV64ZVE32F-NEXT: andi a3, a2, 16
10306 ; RV64ZVE32F-NEXT: bnez a3, .LBB88_16
10307 ; RV64ZVE32F-NEXT: .LBB88_7: # %else11
10308 ; RV64ZVE32F-NEXT: andi a3, a2, 32
10309 ; RV64ZVE32F-NEXT: beqz a3, .LBB88_9
10310 ; RV64ZVE32F-NEXT: .LBB88_8: # %cond.load13
10311 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
10312 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10313 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10314 ; RV64ZVE32F-NEXT: add a3, a1, a3
10315 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
10316 ; RV64ZVE32F-NEXT: .LBB88_9: # %else14
10317 ; RV64ZVE32F-NEXT: andi a3, a2, 64
10318 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
10319 ; RV64ZVE32F-NEXT: beqz a3, .LBB88_11
10320 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
10321 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10322 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10323 ; RV64ZVE32F-NEXT: add a3, a1, a3
10324 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
10325 ; RV64ZVE32F-NEXT: .LBB88_11: # %else17
10326 ; RV64ZVE32F-NEXT: andi a2, a2, -128
10327 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_13
10328 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
10329 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10330 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10331 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10332 ; RV64ZVE32F-NEXT: add a1, a1, a2
10333 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
10334 ; RV64ZVE32F-NEXT: .LBB88_13: # %else20
10335 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
10336 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
10337 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
10338 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
10339 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
10340 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
10341 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
10342 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
10343 ; RV64ZVE32F-NEXT: ret
10344 ; RV64ZVE32F-NEXT: .LBB88_14: # %cond.load4
10345 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10346 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10347 ; RV64ZVE32F-NEXT: add a3, a1, a3
10348 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
10349 ; RV64ZVE32F-NEXT: andi a3, a2, 8
10350 ; RV64ZVE32F-NEXT: beqz a3, .LBB88_6
10351 ; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load7
10352 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10353 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10354 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10355 ; RV64ZVE32F-NEXT: add a3, a1, a3
10356 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
10357 ; RV64ZVE32F-NEXT: andi a3, a2, 16
10358 ; RV64ZVE32F-NEXT: beqz a3, .LBB88_7
10359 ; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load10
10360 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
10361 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10362 ; RV64ZVE32F-NEXT: add a3, a1, a3
10363 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
10364 ; RV64ZVE32F-NEXT: andi a3, a2, 32
10365 ; RV64ZVE32F-NEXT: bnez a3, .LBB88_8
10366 ; RV64ZVE32F-NEXT: j .LBB88_9
10367 %eidxs = sext <8 x i8> %idxs to <8 x i64>
10368 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
10369 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
10370 ret <8 x double> %v
10373 define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
10374 ; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8f64:
10376 ; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
10377 ; RV32V-NEXT: vzext.vf2 v9, v8
10378 ; RV32V-NEXT: vsll.vi v8, v9, 3
10379 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
10380 ; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t
10381 ; RV32V-NEXT: vmv.v.v v8, v12
10384 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f64:
10386 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
10387 ; RV64V-NEXT: vzext.vf2 v9, v8
10388 ; RV64V-NEXT: vsll.vi v8, v9, 3
10389 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
10390 ; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t
10391 ; RV64V-NEXT: vmv.v.v v8, v12
10394 ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64:
10395 ; RV32ZVE32F: # %bb.0:
10396 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10397 ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
10398 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
10399 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
10400 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10401 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
10402 ; RV32ZVE32F-NEXT: andi a2, a1, 1
10403 ; RV32ZVE32F-NEXT: bnez a2, .LBB89_10
10404 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10405 ; RV32ZVE32F-NEXT: andi a2, a1, 2
10406 ; RV32ZVE32F-NEXT: bnez a2, .LBB89_11
10407 ; RV32ZVE32F-NEXT: .LBB89_2: # %else2
10408 ; RV32ZVE32F-NEXT: andi a2, a1, 4
10409 ; RV32ZVE32F-NEXT: bnez a2, .LBB89_12
10410 ; RV32ZVE32F-NEXT: .LBB89_3: # %else5
10411 ; RV32ZVE32F-NEXT: andi a2, a1, 8
10412 ; RV32ZVE32F-NEXT: bnez a2, .LBB89_13
10413 ; RV32ZVE32F-NEXT: .LBB89_4: # %else8
10414 ; RV32ZVE32F-NEXT: andi a2, a1, 16
10415 ; RV32ZVE32F-NEXT: bnez a2, .LBB89_14
10416 ; RV32ZVE32F-NEXT: .LBB89_5: # %else11
10417 ; RV32ZVE32F-NEXT: andi a2, a1, 32
10418 ; RV32ZVE32F-NEXT: bnez a2, .LBB89_15
10419 ; RV32ZVE32F-NEXT: .LBB89_6: # %else14
10420 ; RV32ZVE32F-NEXT: andi a2, a1, 64
10421 ; RV32ZVE32F-NEXT: bnez a2, .LBB89_16
10422 ; RV32ZVE32F-NEXT: .LBB89_7: # %else17
10423 ; RV32ZVE32F-NEXT: andi a1, a1, -128
10424 ; RV32ZVE32F-NEXT: beqz a1, .LBB89_9
10425 ; RV32ZVE32F-NEXT: .LBB89_8: # %cond.load19
10426 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10427 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10428 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
10429 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
10430 ; RV32ZVE32F-NEXT: .LBB89_9: # %else20
10431 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
10432 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
10433 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
10434 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
10435 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
10436 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
10437 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
10438 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
10439 ; RV32ZVE32F-NEXT: ret
10440 ; RV32ZVE32F-NEXT: .LBB89_10: # %cond.load
10441 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10442 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
10443 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
10444 ; RV32ZVE32F-NEXT: andi a2, a1, 2
10445 ; RV32ZVE32F-NEXT: beqz a2, .LBB89_2
10446 ; RV32ZVE32F-NEXT: .LBB89_11: # %cond.load1
10447 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10448 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10449 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10450 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
10451 ; RV32ZVE32F-NEXT: andi a2, a1, 4
10452 ; RV32ZVE32F-NEXT: beqz a2, .LBB89_3
10453 ; RV32ZVE32F-NEXT: .LBB89_12: # %cond.load4
10454 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10455 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10456 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10457 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
10458 ; RV32ZVE32F-NEXT: andi a2, a1, 8
10459 ; RV32ZVE32F-NEXT: beqz a2, .LBB89_4
10460 ; RV32ZVE32F-NEXT: .LBB89_13: # %cond.load7
10461 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10462 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10463 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10464 ; RV32ZVE32F-NEXT: fld fa3, 0(a2)
10465 ; RV32ZVE32F-NEXT: andi a2, a1, 16
10466 ; RV32ZVE32F-NEXT: beqz a2, .LBB89_5
10467 ; RV32ZVE32F-NEXT: .LBB89_14: # %cond.load10
10468 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10469 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10470 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10471 ; RV32ZVE32F-NEXT: fld fa4, 0(a2)
10472 ; RV32ZVE32F-NEXT: andi a2, a1, 32
10473 ; RV32ZVE32F-NEXT: beqz a2, .LBB89_6
10474 ; RV32ZVE32F-NEXT: .LBB89_15: # %cond.load13
10475 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10476 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10477 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10478 ; RV32ZVE32F-NEXT: fld fa5, 0(a2)
10479 ; RV32ZVE32F-NEXT: andi a2, a1, 64
10480 ; RV32ZVE32F-NEXT: beqz a2, .LBB89_7
10481 ; RV32ZVE32F-NEXT: .LBB89_16: # %cond.load16
10482 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10483 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10484 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10485 ; RV32ZVE32F-NEXT: fld fa6, 0(a2)
10486 ; RV32ZVE32F-NEXT: andi a1, a1, -128
10487 ; RV32ZVE32F-NEXT: bnez a1, .LBB89_8
10488 ; RV32ZVE32F-NEXT: j .LBB89_9
10490 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64:
10491 ; RV64ZVE32F: # %bb.0:
10492 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10493 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
10494 ; RV64ZVE32F-NEXT: andi a3, a2, 1
10495 ; RV64ZVE32F-NEXT: beqz a3, .LBB89_2
10496 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
10497 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10498 ; RV64ZVE32F-NEXT: andi a3, a3, 255
10499 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10500 ; RV64ZVE32F-NEXT: add a3, a1, a3
10501 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
10502 ; RV64ZVE32F-NEXT: .LBB89_2: # %else
10503 ; RV64ZVE32F-NEXT: andi a3, a2, 2
10504 ; RV64ZVE32F-NEXT: beqz a3, .LBB89_4
10505 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
10506 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10507 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
10508 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
10509 ; RV64ZVE32F-NEXT: andi a3, a3, 255
10510 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10511 ; RV64ZVE32F-NEXT: add a3, a1, a3
10512 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
10513 ; RV64ZVE32F-NEXT: .LBB89_4: # %else2
10514 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
10515 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
10516 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
10517 ; RV64ZVE32F-NEXT: andi a3, a2, 4
10518 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10519 ; RV64ZVE32F-NEXT: bnez a3, .LBB89_14
10520 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
10521 ; RV64ZVE32F-NEXT: andi a3, a2, 8
10522 ; RV64ZVE32F-NEXT: bnez a3, .LBB89_15
10523 ; RV64ZVE32F-NEXT: .LBB89_6: # %else8
10524 ; RV64ZVE32F-NEXT: andi a3, a2, 16
10525 ; RV64ZVE32F-NEXT: bnez a3, .LBB89_16
10526 ; RV64ZVE32F-NEXT: .LBB89_7: # %else11
10527 ; RV64ZVE32F-NEXT: andi a3, a2, 32
10528 ; RV64ZVE32F-NEXT: beqz a3, .LBB89_9
10529 ; RV64ZVE32F-NEXT: .LBB89_8: # %cond.load13
10530 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
10531 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10532 ; RV64ZVE32F-NEXT: andi a3, a3, 255
10533 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10534 ; RV64ZVE32F-NEXT: add a3, a1, a3
10535 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
10536 ; RV64ZVE32F-NEXT: .LBB89_9: # %else14
10537 ; RV64ZVE32F-NEXT: andi a3, a2, 64
10538 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
10539 ; RV64ZVE32F-NEXT: beqz a3, .LBB89_11
10540 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
10541 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10542 ; RV64ZVE32F-NEXT: andi a3, a3, 255
10543 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10544 ; RV64ZVE32F-NEXT: add a3, a1, a3
10545 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
10546 ; RV64ZVE32F-NEXT: .LBB89_11: # %else17
10547 ; RV64ZVE32F-NEXT: andi a2, a2, -128
10548 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_13
10549 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
10550 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10551 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10552 ; RV64ZVE32F-NEXT: andi a2, a2, 255
10553 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10554 ; RV64ZVE32F-NEXT: add a1, a1, a2
10555 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
10556 ; RV64ZVE32F-NEXT: .LBB89_13: # %else20
10557 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
10558 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
10559 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
10560 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
10561 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
10562 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
10563 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
10564 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
10565 ; RV64ZVE32F-NEXT: ret
10566 ; RV64ZVE32F-NEXT: .LBB89_14: # %cond.load4
10567 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10568 ; RV64ZVE32F-NEXT: andi a3, a3, 255
10569 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10570 ; RV64ZVE32F-NEXT: add a3, a1, a3
10571 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
10572 ; RV64ZVE32F-NEXT: andi a3, a2, 8
10573 ; RV64ZVE32F-NEXT: beqz a3, .LBB89_6
10574 ; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load7
10575 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10576 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10577 ; RV64ZVE32F-NEXT: andi a3, a3, 255
10578 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10579 ; RV64ZVE32F-NEXT: add a3, a1, a3
10580 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
10581 ; RV64ZVE32F-NEXT: andi a3, a2, 16
10582 ; RV64ZVE32F-NEXT: beqz a3, .LBB89_7
10583 ; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load10
10584 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
10585 ; RV64ZVE32F-NEXT: andi a3, a3, 255
10586 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10587 ; RV64ZVE32F-NEXT: add a3, a1, a3
10588 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
10589 ; RV64ZVE32F-NEXT: andi a3, a2, 32
10590 ; RV64ZVE32F-NEXT: bnez a3, .LBB89_8
10591 ; RV64ZVE32F-NEXT: j .LBB89_9
10592 %eidxs = zext <8 x i8> %idxs to <8 x i64>
10593 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
10594 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
10595 ret <8 x double> %v
10598 define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
10599 ; RV32V-LABEL: mgather_baseidx_v8i16_v8f64:
10601 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10602 ; RV32V-NEXT: vsext.vf2 v10, v8
10603 ; RV32V-NEXT: vsll.vi v8, v10, 3
10604 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
10605 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
10606 ; RV32V-NEXT: vmv.v.v v8, v12
10609 ; RV64V-LABEL: mgather_baseidx_v8i16_v8f64:
10611 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
10612 ; RV64V-NEXT: vsext.vf4 v16, v8
10613 ; RV64V-NEXT: vsll.vi v8, v16, 3
10614 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
10615 ; RV64V-NEXT: vmv.v.v v8, v12
10618 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64:
10619 ; RV32ZVE32F: # %bb.0:
10620 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10621 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
10622 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
10623 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
10624 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10625 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
10626 ; RV32ZVE32F-NEXT: andi a2, a1, 1
10627 ; RV32ZVE32F-NEXT: bnez a2, .LBB90_10
10628 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10629 ; RV32ZVE32F-NEXT: andi a2, a1, 2
10630 ; RV32ZVE32F-NEXT: bnez a2, .LBB90_11
10631 ; RV32ZVE32F-NEXT: .LBB90_2: # %else2
10632 ; RV32ZVE32F-NEXT: andi a2, a1, 4
10633 ; RV32ZVE32F-NEXT: bnez a2, .LBB90_12
10634 ; RV32ZVE32F-NEXT: .LBB90_3: # %else5
10635 ; RV32ZVE32F-NEXT: andi a2, a1, 8
10636 ; RV32ZVE32F-NEXT: bnez a2, .LBB90_13
10637 ; RV32ZVE32F-NEXT: .LBB90_4: # %else8
10638 ; RV32ZVE32F-NEXT: andi a2, a1, 16
10639 ; RV32ZVE32F-NEXT: bnez a2, .LBB90_14
10640 ; RV32ZVE32F-NEXT: .LBB90_5: # %else11
10641 ; RV32ZVE32F-NEXT: andi a2, a1, 32
10642 ; RV32ZVE32F-NEXT: bnez a2, .LBB90_15
10643 ; RV32ZVE32F-NEXT: .LBB90_6: # %else14
10644 ; RV32ZVE32F-NEXT: andi a2, a1, 64
10645 ; RV32ZVE32F-NEXT: bnez a2, .LBB90_16
10646 ; RV32ZVE32F-NEXT: .LBB90_7: # %else17
10647 ; RV32ZVE32F-NEXT: andi a1, a1, -128
10648 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_9
10649 ; RV32ZVE32F-NEXT: .LBB90_8: # %cond.load19
10650 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10651 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10652 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
10653 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
10654 ; RV32ZVE32F-NEXT: .LBB90_9: # %else20
10655 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
10656 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
10657 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
10658 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
10659 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
10660 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
10661 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
10662 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
10663 ; RV32ZVE32F-NEXT: ret
10664 ; RV32ZVE32F-NEXT: .LBB90_10: # %cond.load
10665 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10666 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
10667 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
10668 ; RV32ZVE32F-NEXT: andi a2, a1, 2
10669 ; RV32ZVE32F-NEXT: beqz a2, .LBB90_2
10670 ; RV32ZVE32F-NEXT: .LBB90_11: # %cond.load1
10671 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10672 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10673 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10674 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
10675 ; RV32ZVE32F-NEXT: andi a2, a1, 4
10676 ; RV32ZVE32F-NEXT: beqz a2, .LBB90_3
10677 ; RV32ZVE32F-NEXT: .LBB90_12: # %cond.load4
10678 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10679 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10680 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10681 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
10682 ; RV32ZVE32F-NEXT: andi a2, a1, 8
10683 ; RV32ZVE32F-NEXT: beqz a2, .LBB90_4
10684 ; RV32ZVE32F-NEXT: .LBB90_13: # %cond.load7
10685 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10686 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10687 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10688 ; RV32ZVE32F-NEXT: fld fa3, 0(a2)
10689 ; RV32ZVE32F-NEXT: andi a2, a1, 16
10690 ; RV32ZVE32F-NEXT: beqz a2, .LBB90_5
10691 ; RV32ZVE32F-NEXT: .LBB90_14: # %cond.load10
10692 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10693 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10694 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10695 ; RV32ZVE32F-NEXT: fld fa4, 0(a2)
10696 ; RV32ZVE32F-NEXT: andi a2, a1, 32
10697 ; RV32ZVE32F-NEXT: beqz a2, .LBB90_6
10698 ; RV32ZVE32F-NEXT: .LBB90_15: # %cond.load13
10699 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10700 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10701 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10702 ; RV32ZVE32F-NEXT: fld fa5, 0(a2)
10703 ; RV32ZVE32F-NEXT: andi a2, a1, 64
10704 ; RV32ZVE32F-NEXT: beqz a2, .LBB90_7
10705 ; RV32ZVE32F-NEXT: .LBB90_16: # %cond.load16
10706 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10707 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10708 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10709 ; RV32ZVE32F-NEXT: fld fa6, 0(a2)
10710 ; RV32ZVE32F-NEXT: andi a1, a1, -128
10711 ; RV32ZVE32F-NEXT: bnez a1, .LBB90_8
10712 ; RV32ZVE32F-NEXT: j .LBB90_9
10714 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64:
10715 ; RV64ZVE32F: # %bb.0:
10716 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10717 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
10718 ; RV64ZVE32F-NEXT: andi a3, a2, 1
10719 ; RV64ZVE32F-NEXT: beqz a3, .LBB90_2
10720 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
10721 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
10722 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10723 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10724 ; RV64ZVE32F-NEXT: add a3, a1, a3
10725 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
10726 ; RV64ZVE32F-NEXT: .LBB90_2: # %else
10727 ; RV64ZVE32F-NEXT: andi a3, a2, 2
10728 ; RV64ZVE32F-NEXT: beqz a3, .LBB90_4
10729 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
10730 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10731 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
10732 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
10733 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10734 ; RV64ZVE32F-NEXT: add a3, a1, a3
10735 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
10736 ; RV64ZVE32F-NEXT: .LBB90_4: # %else2
10737 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
10738 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
10739 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
10740 ; RV64ZVE32F-NEXT: andi a3, a2, 4
10741 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10742 ; RV64ZVE32F-NEXT: bnez a3, .LBB90_14
10743 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
10744 ; RV64ZVE32F-NEXT: andi a3, a2, 8
10745 ; RV64ZVE32F-NEXT: bnez a3, .LBB90_15
10746 ; RV64ZVE32F-NEXT: .LBB90_6: # %else8
10747 ; RV64ZVE32F-NEXT: andi a3, a2, 16
10748 ; RV64ZVE32F-NEXT: bnez a3, .LBB90_16
10749 ; RV64ZVE32F-NEXT: .LBB90_7: # %else11
10750 ; RV64ZVE32F-NEXT: andi a3, a2, 32
10751 ; RV64ZVE32F-NEXT: beqz a3, .LBB90_9
10752 ; RV64ZVE32F-NEXT: .LBB90_8: # %cond.load13
10753 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
10754 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10755 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10756 ; RV64ZVE32F-NEXT: add a3, a1, a3
10757 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
10758 ; RV64ZVE32F-NEXT: .LBB90_9: # %else14
10759 ; RV64ZVE32F-NEXT: andi a3, a2, 64
10760 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
10761 ; RV64ZVE32F-NEXT: beqz a3, .LBB90_11
10762 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
10763 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10764 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10765 ; RV64ZVE32F-NEXT: add a3, a1, a3
10766 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
10767 ; RV64ZVE32F-NEXT: .LBB90_11: # %else17
10768 ; RV64ZVE32F-NEXT: andi a2, a2, -128
10769 ; RV64ZVE32F-NEXT: beqz a2, .LBB90_13
10770 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
10771 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10772 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10773 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10774 ; RV64ZVE32F-NEXT: add a1, a1, a2
10775 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
10776 ; RV64ZVE32F-NEXT: .LBB90_13: # %else20
10777 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
10778 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
10779 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
10780 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
10781 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
10782 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
10783 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
10784 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
10785 ; RV64ZVE32F-NEXT: ret
10786 ; RV64ZVE32F-NEXT: .LBB90_14: # %cond.load4
10787 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10788 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10789 ; RV64ZVE32F-NEXT: add a3, a1, a3
10790 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
10791 ; RV64ZVE32F-NEXT: andi a3, a2, 8
10792 ; RV64ZVE32F-NEXT: beqz a3, .LBB90_6
10793 ; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load7
10794 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10795 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10796 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10797 ; RV64ZVE32F-NEXT: add a3, a1, a3
10798 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
10799 ; RV64ZVE32F-NEXT: andi a3, a2, 16
10800 ; RV64ZVE32F-NEXT: beqz a3, .LBB90_7
10801 ; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load10
10802 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
10803 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10804 ; RV64ZVE32F-NEXT: add a3, a1, a3
10805 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
10806 ; RV64ZVE32F-NEXT: andi a3, a2, 32
10807 ; RV64ZVE32F-NEXT: bnez a3, .LBB90_8
10808 ; RV64ZVE32F-NEXT: j .LBB90_9
10809 %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
10810 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
10811 ret <8 x double> %v
10814 define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
10815 ; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8f64:
10817 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10818 ; RV32V-NEXT: vsext.vf2 v10, v8
10819 ; RV32V-NEXT: vsll.vi v8, v10, 3
10820 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
10821 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
10822 ; RV32V-NEXT: vmv.v.v v8, v12
10825 ; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8f64:
10827 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
10828 ; RV64V-NEXT: vsext.vf4 v16, v8
10829 ; RV64V-NEXT: vsll.vi v8, v16, 3
10830 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
10831 ; RV64V-NEXT: vmv.v.v v8, v12
10834 ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64:
10835 ; RV32ZVE32F: # %bb.0:
10836 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
10837 ; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
10838 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
10839 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
10840 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10841 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
10842 ; RV32ZVE32F-NEXT: andi a2, a1, 1
10843 ; RV32ZVE32F-NEXT: bnez a2, .LBB91_10
10844 ; RV32ZVE32F-NEXT: # %bb.1: # %else
10845 ; RV32ZVE32F-NEXT: andi a2, a1, 2
10846 ; RV32ZVE32F-NEXT: bnez a2, .LBB91_11
10847 ; RV32ZVE32F-NEXT: .LBB91_2: # %else2
10848 ; RV32ZVE32F-NEXT: andi a2, a1, 4
10849 ; RV32ZVE32F-NEXT: bnez a2, .LBB91_12
10850 ; RV32ZVE32F-NEXT: .LBB91_3: # %else5
10851 ; RV32ZVE32F-NEXT: andi a2, a1, 8
10852 ; RV32ZVE32F-NEXT: bnez a2, .LBB91_13
10853 ; RV32ZVE32F-NEXT: .LBB91_4: # %else8
10854 ; RV32ZVE32F-NEXT: andi a2, a1, 16
10855 ; RV32ZVE32F-NEXT: bnez a2, .LBB91_14
10856 ; RV32ZVE32F-NEXT: .LBB91_5: # %else11
10857 ; RV32ZVE32F-NEXT: andi a2, a1, 32
10858 ; RV32ZVE32F-NEXT: bnez a2, .LBB91_15
10859 ; RV32ZVE32F-NEXT: .LBB91_6: # %else14
10860 ; RV32ZVE32F-NEXT: andi a2, a1, 64
10861 ; RV32ZVE32F-NEXT: bnez a2, .LBB91_16
10862 ; RV32ZVE32F-NEXT: .LBB91_7: # %else17
10863 ; RV32ZVE32F-NEXT: andi a1, a1, -128
10864 ; RV32ZVE32F-NEXT: beqz a1, .LBB91_9
10865 ; RV32ZVE32F-NEXT: .LBB91_8: # %cond.load19
10866 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10867 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
10868 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
10869 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
10870 ; RV32ZVE32F-NEXT: .LBB91_9: # %else20
10871 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
10872 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
10873 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
10874 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
10875 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
10876 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
10877 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
10878 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
10879 ; RV32ZVE32F-NEXT: ret
10880 ; RV32ZVE32F-NEXT: .LBB91_10: # %cond.load
10881 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
10882 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
10883 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
10884 ; RV32ZVE32F-NEXT: andi a2, a1, 2
10885 ; RV32ZVE32F-NEXT: beqz a2, .LBB91_2
10886 ; RV32ZVE32F-NEXT: .LBB91_11: # %cond.load1
10887 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10888 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
10889 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10890 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
10891 ; RV32ZVE32F-NEXT: andi a2, a1, 4
10892 ; RV32ZVE32F-NEXT: beqz a2, .LBB91_3
10893 ; RV32ZVE32F-NEXT: .LBB91_12: # %cond.load4
10894 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10895 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
10896 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10897 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
10898 ; RV32ZVE32F-NEXT: andi a2, a1, 8
10899 ; RV32ZVE32F-NEXT: beqz a2, .LBB91_4
10900 ; RV32ZVE32F-NEXT: .LBB91_13: # %cond.load7
10901 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
10902 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
10903 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10904 ; RV32ZVE32F-NEXT: fld fa3, 0(a2)
10905 ; RV32ZVE32F-NEXT: andi a2, a1, 16
10906 ; RV32ZVE32F-NEXT: beqz a2, .LBB91_5
10907 ; RV32ZVE32F-NEXT: .LBB91_14: # %cond.load10
10908 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10909 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
10910 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10911 ; RV32ZVE32F-NEXT: fld fa4, 0(a2)
10912 ; RV32ZVE32F-NEXT: andi a2, a1, 32
10913 ; RV32ZVE32F-NEXT: beqz a2, .LBB91_6
10914 ; RV32ZVE32F-NEXT: .LBB91_15: # %cond.load13
10915 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10916 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
10917 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10918 ; RV32ZVE32F-NEXT: fld fa5, 0(a2)
10919 ; RV32ZVE32F-NEXT: andi a2, a1, 64
10920 ; RV32ZVE32F-NEXT: beqz a2, .LBB91_7
10921 ; RV32ZVE32F-NEXT: .LBB91_16: # %cond.load16
10922 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
10923 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
10924 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
10925 ; RV32ZVE32F-NEXT: fld fa6, 0(a2)
10926 ; RV32ZVE32F-NEXT: andi a1, a1, -128
10927 ; RV32ZVE32F-NEXT: bnez a1, .LBB91_8
10928 ; RV32ZVE32F-NEXT: j .LBB91_9
10930 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64:
10931 ; RV64ZVE32F: # %bb.0:
10932 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
10933 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
10934 ; RV64ZVE32F-NEXT: andi a3, a2, 1
10935 ; RV64ZVE32F-NEXT: beqz a3, .LBB91_2
10936 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
10937 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
10938 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10939 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10940 ; RV64ZVE32F-NEXT: add a3, a1, a3
10941 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
10942 ; RV64ZVE32F-NEXT: .LBB91_2: # %else
10943 ; RV64ZVE32F-NEXT: andi a3, a2, 2
10944 ; RV64ZVE32F-NEXT: beqz a3, .LBB91_4
10945 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
10946 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
10947 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
10948 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
10949 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10950 ; RV64ZVE32F-NEXT: add a3, a1, a3
10951 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
10952 ; RV64ZVE32F-NEXT: .LBB91_4: # %else2
10953 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
10954 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
10955 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
10956 ; RV64ZVE32F-NEXT: andi a3, a2, 4
10957 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
10958 ; RV64ZVE32F-NEXT: bnez a3, .LBB91_14
10959 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
10960 ; RV64ZVE32F-NEXT: andi a3, a2, 8
10961 ; RV64ZVE32F-NEXT: bnez a3, .LBB91_15
10962 ; RV64ZVE32F-NEXT: .LBB91_6: # %else8
10963 ; RV64ZVE32F-NEXT: andi a3, a2, 16
10964 ; RV64ZVE32F-NEXT: bnez a3, .LBB91_16
10965 ; RV64ZVE32F-NEXT: .LBB91_7: # %else11
10966 ; RV64ZVE32F-NEXT: andi a3, a2, 32
10967 ; RV64ZVE32F-NEXT: beqz a3, .LBB91_9
10968 ; RV64ZVE32F-NEXT: .LBB91_8: # %cond.load13
10969 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
10970 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10971 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10972 ; RV64ZVE32F-NEXT: add a3, a1, a3
10973 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
10974 ; RV64ZVE32F-NEXT: .LBB91_9: # %else14
10975 ; RV64ZVE32F-NEXT: andi a3, a2, 64
10976 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
10977 ; RV64ZVE32F-NEXT: beqz a3, .LBB91_11
10978 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
10979 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
10980 ; RV64ZVE32F-NEXT: slli a3, a3, 3
10981 ; RV64ZVE32F-NEXT: add a3, a1, a3
10982 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
10983 ; RV64ZVE32F-NEXT: .LBB91_11: # %else17
10984 ; RV64ZVE32F-NEXT: andi a2, a2, -128
10985 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_13
10986 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
10987 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
10988 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
10989 ; RV64ZVE32F-NEXT: slli a2, a2, 3
10990 ; RV64ZVE32F-NEXT: add a1, a1, a2
10991 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
10992 ; RV64ZVE32F-NEXT: .LBB91_13: # %else20
10993 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
10994 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
10995 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
10996 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
10997 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
10998 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
10999 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
11000 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
11001 ; RV64ZVE32F-NEXT: ret
11002 ; RV64ZVE32F-NEXT: .LBB91_14: # %cond.load4
11003 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11004 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11005 ; RV64ZVE32F-NEXT: add a3, a1, a3
11006 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
11007 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11008 ; RV64ZVE32F-NEXT: beqz a3, .LBB91_6
11009 ; RV64ZVE32F-NEXT: .LBB91_15: # %cond.load7
11010 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11011 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11012 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11013 ; RV64ZVE32F-NEXT: add a3, a1, a3
11014 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
11015 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11016 ; RV64ZVE32F-NEXT: beqz a3, .LBB91_7
11017 ; RV64ZVE32F-NEXT: .LBB91_16: # %cond.load10
11018 ; RV64ZVE32F-NEXT: vmv.x.s a3, v9
11019 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11020 ; RV64ZVE32F-NEXT: add a3, a1, a3
11021 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
11022 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11023 ; RV64ZVE32F-NEXT: bnez a3, .LBB91_8
11024 ; RV64ZVE32F-NEXT: j .LBB91_9
11025 %eidxs = sext <8 x i16> %idxs to <8 x i64>
11026 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
11027 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
11028 ret <8 x double> %v
11031 define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
11032 ; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8f64:
11034 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11035 ; RV32V-NEXT: vzext.vf2 v10, v8
11036 ; RV32V-NEXT: vsll.vi v8, v10, 3
11037 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
11038 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
11039 ; RV32V-NEXT: vmv.v.v v8, v12
11042 ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f64:
11044 ; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11045 ; RV64V-NEXT: vzext.vf2 v10, v8
11046 ; RV64V-NEXT: vsll.vi v8, v10, 3
11047 ; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
11048 ; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t
11049 ; RV64V-NEXT: vmv.v.v v8, v12
11052 ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64:
11053 ; RV32ZVE32F: # %bb.0:
11054 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11055 ; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
11056 ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
11057 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
11058 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11059 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
11060 ; RV32ZVE32F-NEXT: andi a2, a1, 1
11061 ; RV32ZVE32F-NEXT: bnez a2, .LBB92_10
11062 ; RV32ZVE32F-NEXT: # %bb.1: # %else
11063 ; RV32ZVE32F-NEXT: andi a2, a1, 2
11064 ; RV32ZVE32F-NEXT: bnez a2, .LBB92_11
11065 ; RV32ZVE32F-NEXT: .LBB92_2: # %else2
11066 ; RV32ZVE32F-NEXT: andi a2, a1, 4
11067 ; RV32ZVE32F-NEXT: bnez a2, .LBB92_12
11068 ; RV32ZVE32F-NEXT: .LBB92_3: # %else5
11069 ; RV32ZVE32F-NEXT: andi a2, a1, 8
11070 ; RV32ZVE32F-NEXT: bnez a2, .LBB92_13
11071 ; RV32ZVE32F-NEXT: .LBB92_4: # %else8
11072 ; RV32ZVE32F-NEXT: andi a2, a1, 16
11073 ; RV32ZVE32F-NEXT: bnez a2, .LBB92_14
11074 ; RV32ZVE32F-NEXT: .LBB92_5: # %else11
11075 ; RV32ZVE32F-NEXT: andi a2, a1, 32
11076 ; RV32ZVE32F-NEXT: bnez a2, .LBB92_15
11077 ; RV32ZVE32F-NEXT: .LBB92_6: # %else14
11078 ; RV32ZVE32F-NEXT: andi a2, a1, 64
11079 ; RV32ZVE32F-NEXT: bnez a2, .LBB92_16
11080 ; RV32ZVE32F-NEXT: .LBB92_7: # %else17
11081 ; RV32ZVE32F-NEXT: andi a1, a1, -128
11082 ; RV32ZVE32F-NEXT: beqz a1, .LBB92_9
11083 ; RV32ZVE32F-NEXT: .LBB92_8: # %cond.load19
11084 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11085 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11086 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11087 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
11088 ; RV32ZVE32F-NEXT: .LBB92_9: # %else20
11089 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11090 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
11091 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
11092 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
11093 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
11094 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
11095 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
11096 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
11097 ; RV32ZVE32F-NEXT: ret
11098 ; RV32ZVE32F-NEXT: .LBB92_10: # %cond.load
11099 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
11100 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
11101 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
11102 ; RV32ZVE32F-NEXT: andi a2, a1, 2
11103 ; RV32ZVE32F-NEXT: beqz a2, .LBB92_2
11104 ; RV32ZVE32F-NEXT: .LBB92_11: # %cond.load1
11105 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11106 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11107 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11108 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
11109 ; RV32ZVE32F-NEXT: andi a2, a1, 4
11110 ; RV32ZVE32F-NEXT: beqz a2, .LBB92_3
11111 ; RV32ZVE32F-NEXT: .LBB92_12: # %cond.load4
11112 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11113 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
11114 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11115 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
11116 ; RV32ZVE32F-NEXT: andi a2, a1, 8
11117 ; RV32ZVE32F-NEXT: beqz a2, .LBB92_4
11118 ; RV32ZVE32F-NEXT: .LBB92_13: # %cond.load7
11119 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11120 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
11121 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11122 ; RV32ZVE32F-NEXT: fld fa3, 0(a2)
11123 ; RV32ZVE32F-NEXT: andi a2, a1, 16
11124 ; RV32ZVE32F-NEXT: beqz a2, .LBB92_5
11125 ; RV32ZVE32F-NEXT: .LBB92_14: # %cond.load10
11126 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11127 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11128 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11129 ; RV32ZVE32F-NEXT: fld fa4, 0(a2)
11130 ; RV32ZVE32F-NEXT: andi a2, a1, 32
11131 ; RV32ZVE32F-NEXT: beqz a2, .LBB92_6
11132 ; RV32ZVE32F-NEXT: .LBB92_15: # %cond.load13
11133 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11134 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
11135 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11136 ; RV32ZVE32F-NEXT: fld fa5, 0(a2)
11137 ; RV32ZVE32F-NEXT: andi a2, a1, 64
11138 ; RV32ZVE32F-NEXT: beqz a2, .LBB92_7
11139 ; RV32ZVE32F-NEXT: .LBB92_16: # %cond.load16
11140 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11141 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
11142 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11143 ; RV32ZVE32F-NEXT: fld fa6, 0(a2)
11144 ; RV32ZVE32F-NEXT: andi a1, a1, -128
11145 ; RV32ZVE32F-NEXT: bnez a1, .LBB92_8
11146 ; RV32ZVE32F-NEXT: j .LBB92_9
11148 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64:
11149 ; RV64ZVE32F: # %bb.0:
11150 ; RV64ZVE32F-NEXT: lui a2, 16
11151 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11152 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
11153 ; RV64ZVE32F-NEXT: andi a4, a3, 1
11154 ; RV64ZVE32F-NEXT: addiw a2, a2, -1
11155 ; RV64ZVE32F-NEXT: beqz a4, .LBB92_2
11156 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
11157 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
11158 ; RV64ZVE32F-NEXT: vmv.x.s a4, v8
11159 ; RV64ZVE32F-NEXT: and a4, a4, a2
11160 ; RV64ZVE32F-NEXT: slli a4, a4, 3
11161 ; RV64ZVE32F-NEXT: add a4, a1, a4
11162 ; RV64ZVE32F-NEXT: fld fa0, 0(a4)
11163 ; RV64ZVE32F-NEXT: .LBB92_2: # %else
11164 ; RV64ZVE32F-NEXT: andi a4, a3, 2
11165 ; RV64ZVE32F-NEXT: beqz a4, .LBB92_4
11166 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
11167 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
11168 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
11169 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
11170 ; RV64ZVE32F-NEXT: and a4, a4, a2
11171 ; RV64ZVE32F-NEXT: slli a4, a4, 3
11172 ; RV64ZVE32F-NEXT: add a4, a1, a4
11173 ; RV64ZVE32F-NEXT: fld fa1, 0(a4)
11174 ; RV64ZVE32F-NEXT: .LBB92_4: # %else2
11175 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
11176 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
11177 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
11178 ; RV64ZVE32F-NEXT: andi a4, a3, 4
11179 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
11180 ; RV64ZVE32F-NEXT: bnez a4, .LBB92_14
11181 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
11182 ; RV64ZVE32F-NEXT: andi a4, a3, 8
11183 ; RV64ZVE32F-NEXT: bnez a4, .LBB92_15
11184 ; RV64ZVE32F-NEXT: .LBB92_6: # %else8
11185 ; RV64ZVE32F-NEXT: andi a4, a3, 16
11186 ; RV64ZVE32F-NEXT: bnez a4, .LBB92_16
11187 ; RV64ZVE32F-NEXT: .LBB92_7: # %else11
11188 ; RV64ZVE32F-NEXT: andi a4, a3, 32
11189 ; RV64ZVE32F-NEXT: beqz a4, .LBB92_9
11190 ; RV64ZVE32F-NEXT: .LBB92_8: # %cond.load13
11191 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
11192 ; RV64ZVE32F-NEXT: vmv.x.s a4, v8
11193 ; RV64ZVE32F-NEXT: and a4, a4, a2
11194 ; RV64ZVE32F-NEXT: slli a4, a4, 3
11195 ; RV64ZVE32F-NEXT: add a4, a1, a4
11196 ; RV64ZVE32F-NEXT: fld fa5, 0(a4)
11197 ; RV64ZVE32F-NEXT: .LBB92_9: # %else14
11198 ; RV64ZVE32F-NEXT: andi a4, a3, 64
11199 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
11200 ; RV64ZVE32F-NEXT: beqz a4, .LBB92_11
11201 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
11202 ; RV64ZVE32F-NEXT: vmv.x.s a4, v8
11203 ; RV64ZVE32F-NEXT: and a4, a4, a2
11204 ; RV64ZVE32F-NEXT: slli a4, a4, 3
11205 ; RV64ZVE32F-NEXT: add a4, a1, a4
11206 ; RV64ZVE32F-NEXT: fld fa6, 0(a4)
11207 ; RV64ZVE32F-NEXT: .LBB92_11: # %else17
11208 ; RV64ZVE32F-NEXT: andi a3, a3, -128
11209 ; RV64ZVE32F-NEXT: beqz a3, .LBB92_13
11210 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
11211 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11212 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11213 ; RV64ZVE32F-NEXT: and a2, a3, a2
11214 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11215 ; RV64ZVE32F-NEXT: add a1, a1, a2
11216 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
11217 ; RV64ZVE32F-NEXT: .LBB92_13: # %else20
11218 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
11219 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
11220 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
11221 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
11222 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
11223 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
11224 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
11225 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
11226 ; RV64ZVE32F-NEXT: ret
11227 ; RV64ZVE32F-NEXT: .LBB92_14: # %cond.load4
11228 ; RV64ZVE32F-NEXT: vmv.x.s a4, v8
11229 ; RV64ZVE32F-NEXT: and a4, a4, a2
11230 ; RV64ZVE32F-NEXT: slli a4, a4, 3
11231 ; RV64ZVE32F-NEXT: add a4, a1, a4
11232 ; RV64ZVE32F-NEXT: fld fa2, 0(a4)
11233 ; RV64ZVE32F-NEXT: andi a4, a3, 8
11234 ; RV64ZVE32F-NEXT: beqz a4, .LBB92_6
11235 ; RV64ZVE32F-NEXT: .LBB92_15: # %cond.load7
11236 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11237 ; RV64ZVE32F-NEXT: vmv.x.s a4, v8
11238 ; RV64ZVE32F-NEXT: and a4, a4, a2
11239 ; RV64ZVE32F-NEXT: slli a4, a4, 3
11240 ; RV64ZVE32F-NEXT: add a4, a1, a4
11241 ; RV64ZVE32F-NEXT: fld fa3, 0(a4)
11242 ; RV64ZVE32F-NEXT: andi a4, a3, 16
11243 ; RV64ZVE32F-NEXT: beqz a4, .LBB92_7
11244 ; RV64ZVE32F-NEXT: .LBB92_16: # %cond.load10
11245 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9
11246 ; RV64ZVE32F-NEXT: and a4, a4, a2
11247 ; RV64ZVE32F-NEXT: slli a4, a4, 3
11248 ; RV64ZVE32F-NEXT: add a4, a1, a4
11249 ; RV64ZVE32F-NEXT: fld fa4, 0(a4)
11250 ; RV64ZVE32F-NEXT: andi a4, a3, 32
11251 ; RV64ZVE32F-NEXT: bnez a4, .LBB92_8
11252 ; RV64ZVE32F-NEXT: j .LBB92_9
11253 %eidxs = zext <8 x i16> %idxs to <8 x i64>
11254 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
11255 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
11256 ret <8 x double> %v
11259 define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
11260 ; RV32V-LABEL: mgather_baseidx_v8i32_v8f64:
11262 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11263 ; RV32V-NEXT: vsll.vi v8, v8, 3
11264 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
11265 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
11266 ; RV32V-NEXT: vmv.v.v v8, v12
11269 ; RV64V-LABEL: mgather_baseidx_v8i32_v8f64:
11271 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
11272 ; RV64V-NEXT: vsext.vf2 v16, v8
11273 ; RV64V-NEXT: vsll.vi v8, v16, 3
11274 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
11275 ; RV64V-NEXT: vmv.v.v v8, v12
11278 ; RV32ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64:
11279 ; RV32ZVE32F: # %bb.0:
11280 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11281 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
11282 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
11283 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11284 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
11285 ; RV32ZVE32F-NEXT: andi a2, a1, 1
11286 ; RV32ZVE32F-NEXT: bnez a2, .LBB93_10
11287 ; RV32ZVE32F-NEXT: # %bb.1: # %else
11288 ; RV32ZVE32F-NEXT: andi a2, a1, 2
11289 ; RV32ZVE32F-NEXT: bnez a2, .LBB93_11
11290 ; RV32ZVE32F-NEXT: .LBB93_2: # %else2
11291 ; RV32ZVE32F-NEXT: andi a2, a1, 4
11292 ; RV32ZVE32F-NEXT: bnez a2, .LBB93_12
11293 ; RV32ZVE32F-NEXT: .LBB93_3: # %else5
11294 ; RV32ZVE32F-NEXT: andi a2, a1, 8
11295 ; RV32ZVE32F-NEXT: bnez a2, .LBB93_13
11296 ; RV32ZVE32F-NEXT: .LBB93_4: # %else8
11297 ; RV32ZVE32F-NEXT: andi a2, a1, 16
11298 ; RV32ZVE32F-NEXT: bnez a2, .LBB93_14
11299 ; RV32ZVE32F-NEXT: .LBB93_5: # %else11
11300 ; RV32ZVE32F-NEXT: andi a2, a1, 32
11301 ; RV32ZVE32F-NEXT: bnez a2, .LBB93_15
11302 ; RV32ZVE32F-NEXT: .LBB93_6: # %else14
11303 ; RV32ZVE32F-NEXT: andi a2, a1, 64
11304 ; RV32ZVE32F-NEXT: bnez a2, .LBB93_16
11305 ; RV32ZVE32F-NEXT: .LBB93_7: # %else17
11306 ; RV32ZVE32F-NEXT: andi a1, a1, -128
11307 ; RV32ZVE32F-NEXT: beqz a1, .LBB93_9
11308 ; RV32ZVE32F-NEXT: .LBB93_8: # %cond.load19
11309 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11310 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11311 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11312 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
11313 ; RV32ZVE32F-NEXT: .LBB93_9: # %else20
11314 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11315 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
11316 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
11317 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
11318 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
11319 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
11320 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
11321 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
11322 ; RV32ZVE32F-NEXT: ret
11323 ; RV32ZVE32F-NEXT: .LBB93_10: # %cond.load
11324 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
11325 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
11326 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
11327 ; RV32ZVE32F-NEXT: andi a2, a1, 2
11328 ; RV32ZVE32F-NEXT: beqz a2, .LBB93_2
11329 ; RV32ZVE32F-NEXT: .LBB93_11: # %cond.load1
11330 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11331 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11332 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11333 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
11334 ; RV32ZVE32F-NEXT: andi a2, a1, 4
11335 ; RV32ZVE32F-NEXT: beqz a2, .LBB93_3
11336 ; RV32ZVE32F-NEXT: .LBB93_12: # %cond.load4
11337 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11338 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
11339 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11340 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
11341 ; RV32ZVE32F-NEXT: andi a2, a1, 8
11342 ; RV32ZVE32F-NEXT: beqz a2, .LBB93_4
11343 ; RV32ZVE32F-NEXT: .LBB93_13: # %cond.load7
11344 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11345 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
11346 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11347 ; RV32ZVE32F-NEXT: fld fa3, 0(a2)
11348 ; RV32ZVE32F-NEXT: andi a2, a1, 16
11349 ; RV32ZVE32F-NEXT: beqz a2, .LBB93_5
11350 ; RV32ZVE32F-NEXT: .LBB93_14: # %cond.load10
11351 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11352 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11353 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11354 ; RV32ZVE32F-NEXT: fld fa4, 0(a2)
11355 ; RV32ZVE32F-NEXT: andi a2, a1, 32
11356 ; RV32ZVE32F-NEXT: beqz a2, .LBB93_6
11357 ; RV32ZVE32F-NEXT: .LBB93_15: # %cond.load13
11358 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11359 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
11360 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11361 ; RV32ZVE32F-NEXT: fld fa5, 0(a2)
11362 ; RV32ZVE32F-NEXT: andi a2, a1, 64
11363 ; RV32ZVE32F-NEXT: beqz a2, .LBB93_7
11364 ; RV32ZVE32F-NEXT: .LBB93_16: # %cond.load16
11365 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11366 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
11367 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11368 ; RV32ZVE32F-NEXT: fld fa6, 0(a2)
11369 ; RV32ZVE32F-NEXT: andi a1, a1, -128
11370 ; RV32ZVE32F-NEXT: bnez a1, .LBB93_8
11371 ; RV32ZVE32F-NEXT: j .LBB93_9
11373 ; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64:
11374 ; RV64ZVE32F: # %bb.0:
11375 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11376 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
11377 ; RV64ZVE32F-NEXT: andi a3, a2, 1
11378 ; RV64ZVE32F-NEXT: beqz a3, .LBB93_2
11379 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
11380 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
11381 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11382 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11383 ; RV64ZVE32F-NEXT: add a3, a1, a3
11384 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
11385 ; RV64ZVE32F-NEXT: .LBB93_2: # %else
11386 ; RV64ZVE32F-NEXT: andi a3, a2, 2
11387 ; RV64ZVE32F-NEXT: beqz a3, .LBB93_4
11388 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
11389 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11390 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11391 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
11392 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11393 ; RV64ZVE32F-NEXT: add a3, a1, a3
11394 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
11395 ; RV64ZVE32F-NEXT: .LBB93_4: # %else2
11396 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
11397 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11398 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
11399 ; RV64ZVE32F-NEXT: andi a3, a2, 4
11400 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
11401 ; RV64ZVE32F-NEXT: bnez a3, .LBB93_14
11402 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
11403 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11404 ; RV64ZVE32F-NEXT: bnez a3, .LBB93_15
11405 ; RV64ZVE32F-NEXT: .LBB93_6: # %else8
11406 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11407 ; RV64ZVE32F-NEXT: bnez a3, .LBB93_16
11408 ; RV64ZVE32F-NEXT: .LBB93_7: # %else11
11409 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11410 ; RV64ZVE32F-NEXT: beqz a3, .LBB93_9
11411 ; RV64ZVE32F-NEXT: .LBB93_8: # %cond.load13
11412 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
11413 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11414 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11415 ; RV64ZVE32F-NEXT: add a3, a1, a3
11416 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
11417 ; RV64ZVE32F-NEXT: .LBB93_9: # %else14
11418 ; RV64ZVE32F-NEXT: andi a3, a2, 64
11419 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
11420 ; RV64ZVE32F-NEXT: beqz a3, .LBB93_11
11421 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
11422 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11423 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11424 ; RV64ZVE32F-NEXT: add a3, a1, a3
11425 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
11426 ; RV64ZVE32F-NEXT: .LBB93_11: # %else17
11427 ; RV64ZVE32F-NEXT: andi a2, a2, -128
11428 ; RV64ZVE32F-NEXT: beqz a2, .LBB93_13
11429 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
11430 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11431 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11432 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11433 ; RV64ZVE32F-NEXT: add a1, a1, a2
11434 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
11435 ; RV64ZVE32F-NEXT: .LBB93_13: # %else20
11436 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
11437 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
11438 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
11439 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
11440 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
11441 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
11442 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
11443 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
11444 ; RV64ZVE32F-NEXT: ret
11445 ; RV64ZVE32F-NEXT: .LBB93_14: # %cond.load4
11446 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11447 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11448 ; RV64ZVE32F-NEXT: add a3, a1, a3
11449 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
11450 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11451 ; RV64ZVE32F-NEXT: beqz a3, .LBB93_6
11452 ; RV64ZVE32F-NEXT: .LBB93_15: # %cond.load7
11453 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11454 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11455 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11456 ; RV64ZVE32F-NEXT: add a3, a1, a3
11457 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
11458 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11459 ; RV64ZVE32F-NEXT: beqz a3, .LBB93_7
11460 ; RV64ZVE32F-NEXT: .LBB93_16: # %cond.load10
11461 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
11462 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11463 ; RV64ZVE32F-NEXT: add a3, a1, a3
11464 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
11465 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11466 ; RV64ZVE32F-NEXT: bnez a3, .LBB93_8
11467 ; RV64ZVE32F-NEXT: j .LBB93_9
11468 %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
11469 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
11470 ret <8 x double> %v
11473 define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
11474 ; RV32V-LABEL: mgather_baseidx_sext_v8i32_v8f64:
11476 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11477 ; RV32V-NEXT: vsll.vi v8, v8, 3
11478 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
11479 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
11480 ; RV32V-NEXT: vmv.v.v v8, v12
11483 ; RV64V-LABEL: mgather_baseidx_sext_v8i32_v8f64:
11485 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
11486 ; RV64V-NEXT: vsext.vf2 v16, v8
11487 ; RV64V-NEXT: vsll.vi v8, v16, 3
11488 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
11489 ; RV64V-NEXT: vmv.v.v v8, v12
11492 ; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64:
11493 ; RV32ZVE32F: # %bb.0:
11494 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11495 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
11496 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
11497 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11498 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
11499 ; RV32ZVE32F-NEXT: andi a2, a1, 1
11500 ; RV32ZVE32F-NEXT: bnez a2, .LBB94_10
11501 ; RV32ZVE32F-NEXT: # %bb.1: # %else
11502 ; RV32ZVE32F-NEXT: andi a2, a1, 2
11503 ; RV32ZVE32F-NEXT: bnez a2, .LBB94_11
11504 ; RV32ZVE32F-NEXT: .LBB94_2: # %else2
11505 ; RV32ZVE32F-NEXT: andi a2, a1, 4
11506 ; RV32ZVE32F-NEXT: bnez a2, .LBB94_12
11507 ; RV32ZVE32F-NEXT: .LBB94_3: # %else5
11508 ; RV32ZVE32F-NEXT: andi a2, a1, 8
11509 ; RV32ZVE32F-NEXT: bnez a2, .LBB94_13
11510 ; RV32ZVE32F-NEXT: .LBB94_4: # %else8
11511 ; RV32ZVE32F-NEXT: andi a2, a1, 16
11512 ; RV32ZVE32F-NEXT: bnez a2, .LBB94_14
11513 ; RV32ZVE32F-NEXT: .LBB94_5: # %else11
11514 ; RV32ZVE32F-NEXT: andi a2, a1, 32
11515 ; RV32ZVE32F-NEXT: bnez a2, .LBB94_15
11516 ; RV32ZVE32F-NEXT: .LBB94_6: # %else14
11517 ; RV32ZVE32F-NEXT: andi a2, a1, 64
11518 ; RV32ZVE32F-NEXT: bnez a2, .LBB94_16
11519 ; RV32ZVE32F-NEXT: .LBB94_7: # %else17
11520 ; RV32ZVE32F-NEXT: andi a1, a1, -128
11521 ; RV32ZVE32F-NEXT: beqz a1, .LBB94_9
11522 ; RV32ZVE32F-NEXT: .LBB94_8: # %cond.load19
11523 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11524 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11525 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11526 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
11527 ; RV32ZVE32F-NEXT: .LBB94_9: # %else20
11528 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11529 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
11530 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
11531 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
11532 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
11533 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
11534 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
11535 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
11536 ; RV32ZVE32F-NEXT: ret
11537 ; RV32ZVE32F-NEXT: .LBB94_10: # %cond.load
11538 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
11539 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
11540 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
11541 ; RV32ZVE32F-NEXT: andi a2, a1, 2
11542 ; RV32ZVE32F-NEXT: beqz a2, .LBB94_2
11543 ; RV32ZVE32F-NEXT: .LBB94_11: # %cond.load1
11544 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11545 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11546 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11547 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
11548 ; RV32ZVE32F-NEXT: andi a2, a1, 4
11549 ; RV32ZVE32F-NEXT: beqz a2, .LBB94_3
11550 ; RV32ZVE32F-NEXT: .LBB94_12: # %cond.load4
11551 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11552 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
11553 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11554 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
11555 ; RV32ZVE32F-NEXT: andi a2, a1, 8
11556 ; RV32ZVE32F-NEXT: beqz a2, .LBB94_4
11557 ; RV32ZVE32F-NEXT: .LBB94_13: # %cond.load7
11558 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11559 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
11560 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11561 ; RV32ZVE32F-NEXT: fld fa3, 0(a2)
11562 ; RV32ZVE32F-NEXT: andi a2, a1, 16
11563 ; RV32ZVE32F-NEXT: beqz a2, .LBB94_5
11564 ; RV32ZVE32F-NEXT: .LBB94_14: # %cond.load10
11565 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11566 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11567 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11568 ; RV32ZVE32F-NEXT: fld fa4, 0(a2)
11569 ; RV32ZVE32F-NEXT: andi a2, a1, 32
11570 ; RV32ZVE32F-NEXT: beqz a2, .LBB94_6
11571 ; RV32ZVE32F-NEXT: .LBB94_15: # %cond.load13
11572 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11573 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
11574 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11575 ; RV32ZVE32F-NEXT: fld fa5, 0(a2)
11576 ; RV32ZVE32F-NEXT: andi a2, a1, 64
11577 ; RV32ZVE32F-NEXT: beqz a2, .LBB94_7
11578 ; RV32ZVE32F-NEXT: .LBB94_16: # %cond.load16
11579 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11580 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
11581 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11582 ; RV32ZVE32F-NEXT: fld fa6, 0(a2)
11583 ; RV32ZVE32F-NEXT: andi a1, a1, -128
11584 ; RV32ZVE32F-NEXT: bnez a1, .LBB94_8
11585 ; RV32ZVE32F-NEXT: j .LBB94_9
11587 ; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64:
11588 ; RV64ZVE32F: # %bb.0:
11589 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11590 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
11591 ; RV64ZVE32F-NEXT: andi a3, a2, 1
11592 ; RV64ZVE32F-NEXT: beqz a3, .LBB94_2
11593 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
11594 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
11595 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11596 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11597 ; RV64ZVE32F-NEXT: add a3, a1, a3
11598 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
11599 ; RV64ZVE32F-NEXT: .LBB94_2: # %else
11600 ; RV64ZVE32F-NEXT: andi a3, a2, 2
11601 ; RV64ZVE32F-NEXT: beqz a3, .LBB94_4
11602 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
11603 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11604 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11605 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
11606 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11607 ; RV64ZVE32F-NEXT: add a3, a1, a3
11608 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
11609 ; RV64ZVE32F-NEXT: .LBB94_4: # %else2
11610 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
11611 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11612 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
11613 ; RV64ZVE32F-NEXT: andi a3, a2, 4
11614 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
11615 ; RV64ZVE32F-NEXT: bnez a3, .LBB94_14
11616 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
11617 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11618 ; RV64ZVE32F-NEXT: bnez a3, .LBB94_15
11619 ; RV64ZVE32F-NEXT: .LBB94_6: # %else8
11620 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11621 ; RV64ZVE32F-NEXT: bnez a3, .LBB94_16
11622 ; RV64ZVE32F-NEXT: .LBB94_7: # %else11
11623 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11624 ; RV64ZVE32F-NEXT: beqz a3, .LBB94_9
11625 ; RV64ZVE32F-NEXT: .LBB94_8: # %cond.load13
11626 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
11627 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11628 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11629 ; RV64ZVE32F-NEXT: add a3, a1, a3
11630 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
11631 ; RV64ZVE32F-NEXT: .LBB94_9: # %else14
11632 ; RV64ZVE32F-NEXT: andi a3, a2, 64
11633 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
11634 ; RV64ZVE32F-NEXT: beqz a3, .LBB94_11
11635 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
11636 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11637 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11638 ; RV64ZVE32F-NEXT: add a3, a1, a3
11639 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
11640 ; RV64ZVE32F-NEXT: .LBB94_11: # %else17
11641 ; RV64ZVE32F-NEXT: andi a2, a2, -128
11642 ; RV64ZVE32F-NEXT: beqz a2, .LBB94_13
11643 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
11644 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11645 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11646 ; RV64ZVE32F-NEXT: slli a2, a2, 3
11647 ; RV64ZVE32F-NEXT: add a1, a1, a2
11648 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
11649 ; RV64ZVE32F-NEXT: .LBB94_13: # %else20
11650 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
11651 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
11652 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
11653 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
11654 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
11655 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
11656 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
11657 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
11658 ; RV64ZVE32F-NEXT: ret
11659 ; RV64ZVE32F-NEXT: .LBB94_14: # %cond.load4
11660 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11661 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11662 ; RV64ZVE32F-NEXT: add a3, a1, a3
11663 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
11664 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11665 ; RV64ZVE32F-NEXT: beqz a3, .LBB94_6
11666 ; RV64ZVE32F-NEXT: .LBB94_15: # %cond.load7
11667 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11668 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11669 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11670 ; RV64ZVE32F-NEXT: add a3, a1, a3
11671 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
11672 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11673 ; RV64ZVE32F-NEXT: beqz a3, .LBB94_7
11674 ; RV64ZVE32F-NEXT: .LBB94_16: # %cond.load10
11675 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
11676 ; RV64ZVE32F-NEXT: slli a3, a3, 3
11677 ; RV64ZVE32F-NEXT: add a3, a1, a3
11678 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
11679 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11680 ; RV64ZVE32F-NEXT: bnez a3, .LBB94_8
11681 ; RV64ZVE32F-NEXT: j .LBB94_9
11682 %eidxs = sext <8 x i32> %idxs to <8 x i64>
11683 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
11684 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
11685 ret <8 x double> %v
11688 define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
11689 ; RV32V-LABEL: mgather_baseidx_zext_v8i32_v8f64:
11691 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11692 ; RV32V-NEXT: vsll.vi v8, v8, 3
11693 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
11694 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
11695 ; RV32V-NEXT: vmv.v.v v8, v12
11698 ; RV64V-LABEL: mgather_baseidx_zext_v8i32_v8f64:
11700 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
11701 ; RV64V-NEXT: vzext.vf2 v16, v8
11702 ; RV64V-NEXT: vsll.vi v8, v16, 3
11703 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
11704 ; RV64V-NEXT: vmv.v.v v8, v12
11707 ; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64:
11708 ; RV32ZVE32F: # %bb.0:
11709 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11710 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
11711 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
11712 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11713 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
11714 ; RV32ZVE32F-NEXT: andi a2, a1, 1
11715 ; RV32ZVE32F-NEXT: bnez a2, .LBB95_10
11716 ; RV32ZVE32F-NEXT: # %bb.1: # %else
11717 ; RV32ZVE32F-NEXT: andi a2, a1, 2
11718 ; RV32ZVE32F-NEXT: bnez a2, .LBB95_11
11719 ; RV32ZVE32F-NEXT: .LBB95_2: # %else2
11720 ; RV32ZVE32F-NEXT: andi a2, a1, 4
11721 ; RV32ZVE32F-NEXT: bnez a2, .LBB95_12
11722 ; RV32ZVE32F-NEXT: .LBB95_3: # %else5
11723 ; RV32ZVE32F-NEXT: andi a2, a1, 8
11724 ; RV32ZVE32F-NEXT: bnez a2, .LBB95_13
11725 ; RV32ZVE32F-NEXT: .LBB95_4: # %else8
11726 ; RV32ZVE32F-NEXT: andi a2, a1, 16
11727 ; RV32ZVE32F-NEXT: bnez a2, .LBB95_14
11728 ; RV32ZVE32F-NEXT: .LBB95_5: # %else11
11729 ; RV32ZVE32F-NEXT: andi a2, a1, 32
11730 ; RV32ZVE32F-NEXT: bnez a2, .LBB95_15
11731 ; RV32ZVE32F-NEXT: .LBB95_6: # %else14
11732 ; RV32ZVE32F-NEXT: andi a2, a1, 64
11733 ; RV32ZVE32F-NEXT: bnez a2, .LBB95_16
11734 ; RV32ZVE32F-NEXT: .LBB95_7: # %else17
11735 ; RV32ZVE32F-NEXT: andi a1, a1, -128
11736 ; RV32ZVE32F-NEXT: beqz a1, .LBB95_9
11737 ; RV32ZVE32F-NEXT: .LBB95_8: # %cond.load19
11738 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11739 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11740 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11741 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
11742 ; RV32ZVE32F-NEXT: .LBB95_9: # %else20
11743 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11744 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
11745 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
11746 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
11747 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
11748 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
11749 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
11750 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
11751 ; RV32ZVE32F-NEXT: ret
11752 ; RV32ZVE32F-NEXT: .LBB95_10: # %cond.load
11753 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
11754 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
11755 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
11756 ; RV32ZVE32F-NEXT: andi a2, a1, 2
11757 ; RV32ZVE32F-NEXT: beqz a2, .LBB95_2
11758 ; RV32ZVE32F-NEXT: .LBB95_11: # %cond.load1
11759 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11760 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11761 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11762 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
11763 ; RV32ZVE32F-NEXT: andi a2, a1, 4
11764 ; RV32ZVE32F-NEXT: beqz a2, .LBB95_3
11765 ; RV32ZVE32F-NEXT: .LBB95_12: # %cond.load4
11766 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11767 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
11768 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11769 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
11770 ; RV32ZVE32F-NEXT: andi a2, a1, 8
11771 ; RV32ZVE32F-NEXT: beqz a2, .LBB95_4
11772 ; RV32ZVE32F-NEXT: .LBB95_13: # %cond.load7
11773 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11774 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
11775 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11776 ; RV32ZVE32F-NEXT: fld fa3, 0(a2)
11777 ; RV32ZVE32F-NEXT: andi a2, a1, 16
11778 ; RV32ZVE32F-NEXT: beqz a2, .LBB95_5
11779 ; RV32ZVE32F-NEXT: .LBB95_14: # %cond.load10
11780 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11781 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11782 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11783 ; RV32ZVE32F-NEXT: fld fa4, 0(a2)
11784 ; RV32ZVE32F-NEXT: andi a2, a1, 32
11785 ; RV32ZVE32F-NEXT: beqz a2, .LBB95_6
11786 ; RV32ZVE32F-NEXT: .LBB95_15: # %cond.load13
11787 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11788 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
11789 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11790 ; RV32ZVE32F-NEXT: fld fa5, 0(a2)
11791 ; RV32ZVE32F-NEXT: andi a2, a1, 64
11792 ; RV32ZVE32F-NEXT: beqz a2, .LBB95_7
11793 ; RV32ZVE32F-NEXT: .LBB95_16: # %cond.load16
11794 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11795 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
11796 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
11797 ; RV32ZVE32F-NEXT: fld fa6, 0(a2)
11798 ; RV32ZVE32F-NEXT: andi a1, a1, -128
11799 ; RV32ZVE32F-NEXT: bnez a1, .LBB95_8
11800 ; RV32ZVE32F-NEXT: j .LBB95_9
11802 ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64:
11803 ; RV64ZVE32F: # %bb.0:
11804 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11805 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0
11806 ; RV64ZVE32F-NEXT: andi a3, a2, 1
11807 ; RV64ZVE32F-NEXT: beqz a3, .LBB95_2
11808 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
11809 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
11810 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11811 ; RV64ZVE32F-NEXT: slli a3, a3, 32
11812 ; RV64ZVE32F-NEXT: srli a3, a3, 29
11813 ; RV64ZVE32F-NEXT: add a3, a1, a3
11814 ; RV64ZVE32F-NEXT: fld fa0, 0(a3)
11815 ; RV64ZVE32F-NEXT: .LBB95_2: # %else
11816 ; RV64ZVE32F-NEXT: andi a3, a2, 2
11817 ; RV64ZVE32F-NEXT: beqz a3, .LBB95_4
11818 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
11819 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11820 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
11821 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
11822 ; RV64ZVE32F-NEXT: slli a3, a3, 32
11823 ; RV64ZVE32F-NEXT: srli a3, a3, 29
11824 ; RV64ZVE32F-NEXT: add a3, a1, a3
11825 ; RV64ZVE32F-NEXT: fld fa1, 0(a3)
11826 ; RV64ZVE32F-NEXT: .LBB95_4: # %else2
11827 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
11828 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
11829 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
11830 ; RV64ZVE32F-NEXT: andi a3, a2, 4
11831 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
11832 ; RV64ZVE32F-NEXT: bnez a3, .LBB95_14
11833 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
11834 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11835 ; RV64ZVE32F-NEXT: bnez a3, .LBB95_15
11836 ; RV64ZVE32F-NEXT: .LBB95_6: # %else8
11837 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11838 ; RV64ZVE32F-NEXT: bnez a3, .LBB95_16
11839 ; RV64ZVE32F-NEXT: .LBB95_7: # %else11
11840 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11841 ; RV64ZVE32F-NEXT: beqz a3, .LBB95_9
11842 ; RV64ZVE32F-NEXT: .LBB95_8: # %cond.load13
11843 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
11844 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11845 ; RV64ZVE32F-NEXT: slli a3, a3, 32
11846 ; RV64ZVE32F-NEXT: srli a3, a3, 29
11847 ; RV64ZVE32F-NEXT: add a3, a1, a3
11848 ; RV64ZVE32F-NEXT: fld fa5, 0(a3)
11849 ; RV64ZVE32F-NEXT: .LBB95_9: # %else14
11850 ; RV64ZVE32F-NEXT: andi a3, a2, 64
11851 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
11852 ; RV64ZVE32F-NEXT: beqz a3, .LBB95_11
11853 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
11854 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11855 ; RV64ZVE32F-NEXT: slli a3, a3, 32
11856 ; RV64ZVE32F-NEXT: srli a3, a3, 29
11857 ; RV64ZVE32F-NEXT: add a3, a1, a3
11858 ; RV64ZVE32F-NEXT: fld fa6, 0(a3)
11859 ; RV64ZVE32F-NEXT: .LBB95_11: # %else17
11860 ; RV64ZVE32F-NEXT: andi a2, a2, -128
11861 ; RV64ZVE32F-NEXT: beqz a2, .LBB95_13
11862 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
11863 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11864 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
11865 ; RV64ZVE32F-NEXT: slli a2, a2, 32
11866 ; RV64ZVE32F-NEXT: srli a2, a2, 29
11867 ; RV64ZVE32F-NEXT: add a1, a1, a2
11868 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
11869 ; RV64ZVE32F-NEXT: .LBB95_13: # %else20
11870 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
11871 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
11872 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
11873 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
11874 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
11875 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
11876 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
11877 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
11878 ; RV64ZVE32F-NEXT: ret
11879 ; RV64ZVE32F-NEXT: .LBB95_14: # %cond.load4
11880 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11881 ; RV64ZVE32F-NEXT: slli a3, a3, 32
11882 ; RV64ZVE32F-NEXT: srli a3, a3, 29
11883 ; RV64ZVE32F-NEXT: add a3, a1, a3
11884 ; RV64ZVE32F-NEXT: fld fa2, 0(a3)
11885 ; RV64ZVE32F-NEXT: andi a3, a2, 8
11886 ; RV64ZVE32F-NEXT: beqz a3, .LBB95_6
11887 ; RV64ZVE32F-NEXT: .LBB95_15: # %cond.load7
11888 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
11889 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8
11890 ; RV64ZVE32F-NEXT: slli a3, a3, 32
11891 ; RV64ZVE32F-NEXT: srli a3, a3, 29
11892 ; RV64ZVE32F-NEXT: add a3, a1, a3
11893 ; RV64ZVE32F-NEXT: fld fa3, 0(a3)
11894 ; RV64ZVE32F-NEXT: andi a3, a2, 16
11895 ; RV64ZVE32F-NEXT: beqz a3, .LBB95_7
11896 ; RV64ZVE32F-NEXT: .LBB95_16: # %cond.load10
11897 ; RV64ZVE32F-NEXT: vmv.x.s a3, v10
11898 ; RV64ZVE32F-NEXT: slli a3, a3, 32
11899 ; RV64ZVE32F-NEXT: srli a3, a3, 29
11900 ; RV64ZVE32F-NEXT: add a3, a1, a3
11901 ; RV64ZVE32F-NEXT: fld fa4, 0(a3)
11902 ; RV64ZVE32F-NEXT: andi a3, a2, 32
11903 ; RV64ZVE32F-NEXT: bnez a3, .LBB95_8
11904 ; RV64ZVE32F-NEXT: j .LBB95_9
11905 %eidxs = zext <8 x i32> %idxs to <8 x i64>
11906 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
11907 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
11908 ret <8 x double> %v
11911 define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, <8 x double> %passthru) {
11912 ; RV32V-LABEL: mgather_baseidx_v8f64:
11914 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11915 ; RV32V-NEXT: vnsrl.wi v16, v8, 0
11916 ; RV32V-NEXT: vsll.vi v8, v16, 3
11917 ; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
11918 ; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
11919 ; RV32V-NEXT: vmv.v.v v8, v12
11922 ; RV64V-LABEL: mgather_baseidx_v8f64:
11924 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
11925 ; RV64V-NEXT: vsll.vi v8, v8, 3
11926 ; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
11927 ; RV64V-NEXT: vmv.v.v v8, v12
11930 ; RV32ZVE32F-LABEL: mgather_baseidx_v8f64:
11931 ; RV32ZVE32F: # %bb.0:
11932 ; RV32ZVE32F-NEXT: lw a3, 56(a2)
11933 ; RV32ZVE32F-NEXT: lw a4, 48(a2)
11934 ; RV32ZVE32F-NEXT: lw a5, 40(a2)
11935 ; RV32ZVE32F-NEXT: lw a6, 32(a2)
11936 ; RV32ZVE32F-NEXT: lw a7, 24(a2)
11937 ; RV32ZVE32F-NEXT: lw t0, 0(a2)
11938 ; RV32ZVE32F-NEXT: lw t1, 8(a2)
11939 ; RV32ZVE32F-NEXT: lw a2, 16(a2)
11940 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
11941 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
11942 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
11943 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
11944 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
11945 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
11946 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
11947 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
11948 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3
11949 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
11950 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
11951 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
11952 ; RV32ZVE32F-NEXT: vmv.x.s a1, v0
11953 ; RV32ZVE32F-NEXT: andi a2, a1, 1
11954 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_10
11955 ; RV32ZVE32F-NEXT: # %bb.1: # %else
11956 ; RV32ZVE32F-NEXT: andi a2, a1, 2
11957 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_11
11958 ; RV32ZVE32F-NEXT: .LBB96_2: # %else2
11959 ; RV32ZVE32F-NEXT: andi a2, a1, 4
11960 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_12
11961 ; RV32ZVE32F-NEXT: .LBB96_3: # %else5
11962 ; RV32ZVE32F-NEXT: andi a2, a1, 8
11963 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_13
11964 ; RV32ZVE32F-NEXT: .LBB96_4: # %else8
11965 ; RV32ZVE32F-NEXT: andi a2, a1, 16
11966 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_14
11967 ; RV32ZVE32F-NEXT: .LBB96_5: # %else11
11968 ; RV32ZVE32F-NEXT: andi a2, a1, 32
11969 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_15
11970 ; RV32ZVE32F-NEXT: .LBB96_6: # %else14
11971 ; RV32ZVE32F-NEXT: andi a2, a1, 64
11972 ; RV32ZVE32F-NEXT: bnez a2, .LBB96_16
11973 ; RV32ZVE32F-NEXT: .LBB96_7: # %else17
11974 ; RV32ZVE32F-NEXT: andi a1, a1, -128
11975 ; RV32ZVE32F-NEXT: beqz a1, .LBB96_9
11976 ; RV32ZVE32F-NEXT: .LBB96_8: # %cond.load19
11977 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
11978 ; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
11979 ; RV32ZVE32F-NEXT: vmv.x.s a1, v8
11980 ; RV32ZVE32F-NEXT: fld fa7, 0(a1)
11981 ; RV32ZVE32F-NEXT: .LBB96_9: # %else20
11982 ; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
11983 ; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
11984 ; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
11985 ; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
11986 ; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
11987 ; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
11988 ; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
11989 ; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
11990 ; RV32ZVE32F-NEXT: ret
11991 ; RV32ZVE32F-NEXT: .LBB96_10: # %cond.load
11992 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
11993 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8
11994 ; RV32ZVE32F-NEXT: fld fa0, 0(a2)
11995 ; RV32ZVE32F-NEXT: andi a2, a1, 2
11996 ; RV32ZVE32F-NEXT: beqz a2, .LBB96_2
11997 ; RV32ZVE32F-NEXT: .LBB96_11: # %cond.load1
11998 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
11999 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
12000 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
12001 ; RV32ZVE32F-NEXT: fld fa1, 0(a2)
12002 ; RV32ZVE32F-NEXT: andi a2, a1, 4
12003 ; RV32ZVE32F-NEXT: beqz a2, .LBB96_3
12004 ; RV32ZVE32F-NEXT: .LBB96_12: # %cond.load4
12005 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12006 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
12007 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
12008 ; RV32ZVE32F-NEXT: fld fa2, 0(a2)
12009 ; RV32ZVE32F-NEXT: andi a2, a1, 8
12010 ; RV32ZVE32F-NEXT: beqz a2, .LBB96_4
12011 ; RV32ZVE32F-NEXT: .LBB96_13: # %cond.load7
12012 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12013 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
12014 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
12015 ; RV32ZVE32F-NEXT: fld fa3, 0(a2)
12016 ; RV32ZVE32F-NEXT: andi a2, a1, 16
12017 ; RV32ZVE32F-NEXT: beqz a2, .LBB96_5
12018 ; RV32ZVE32F-NEXT: .LBB96_14: # %cond.load10
12019 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12020 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
12021 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
12022 ; RV32ZVE32F-NEXT: fld fa4, 0(a2)
12023 ; RV32ZVE32F-NEXT: andi a2, a1, 32
12024 ; RV32ZVE32F-NEXT: beqz a2, .LBB96_6
12025 ; RV32ZVE32F-NEXT: .LBB96_15: # %cond.load13
12026 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12027 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
12028 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
12029 ; RV32ZVE32F-NEXT: fld fa5, 0(a2)
12030 ; RV32ZVE32F-NEXT: andi a2, a1, 64
12031 ; RV32ZVE32F-NEXT: beqz a2, .LBB96_7
12032 ; RV32ZVE32F-NEXT: .LBB96_16: # %cond.load16
12033 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
12034 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
12035 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10
12036 ; RV32ZVE32F-NEXT: fld fa6, 0(a2)
12037 ; RV32ZVE32F-NEXT: andi a1, a1, -128
12038 ; RV32ZVE32F-NEXT: bnez a1, .LBB96_8
12039 ; RV32ZVE32F-NEXT: j .LBB96_9
12041 ; RV64ZVE32F-LABEL: mgather_baseidx_v8f64:
12042 ; RV64ZVE32F: # %bb.0:
12043 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12044 ; RV64ZVE32F-NEXT: vmv.x.s a3, v0
12045 ; RV64ZVE32F-NEXT: andi a4, a3, 1
12046 ; RV64ZVE32F-NEXT: bnez a4, .LBB96_10
12047 ; RV64ZVE32F-NEXT: # %bb.1: # %else
12048 ; RV64ZVE32F-NEXT: andi a4, a3, 2
12049 ; RV64ZVE32F-NEXT: bnez a4, .LBB96_11
12050 ; RV64ZVE32F-NEXT: .LBB96_2: # %else2
12051 ; RV64ZVE32F-NEXT: andi a4, a3, 4
12052 ; RV64ZVE32F-NEXT: bnez a4, .LBB96_12
12053 ; RV64ZVE32F-NEXT: .LBB96_3: # %else5
12054 ; RV64ZVE32F-NEXT: andi a4, a3, 8
12055 ; RV64ZVE32F-NEXT: bnez a4, .LBB96_13
12056 ; RV64ZVE32F-NEXT: .LBB96_4: # %else8
12057 ; RV64ZVE32F-NEXT: andi a4, a3, 16
12058 ; RV64ZVE32F-NEXT: bnez a4, .LBB96_14
12059 ; RV64ZVE32F-NEXT: .LBB96_5: # %else11
12060 ; RV64ZVE32F-NEXT: andi a4, a3, 32
12061 ; RV64ZVE32F-NEXT: bnez a4, .LBB96_15
12062 ; RV64ZVE32F-NEXT: .LBB96_6: # %else14
12063 ; RV64ZVE32F-NEXT: andi a4, a3, 64
12064 ; RV64ZVE32F-NEXT: bnez a4, .LBB96_16
12065 ; RV64ZVE32F-NEXT: .LBB96_7: # %else17
12066 ; RV64ZVE32F-NEXT: andi a3, a3, -128
12067 ; RV64ZVE32F-NEXT: beqz a3, .LBB96_9
12068 ; RV64ZVE32F-NEXT: .LBB96_8: # %cond.load19
12069 ; RV64ZVE32F-NEXT: ld a2, 56(a2)
12070 ; RV64ZVE32F-NEXT: slli a2, a2, 3
12071 ; RV64ZVE32F-NEXT: add a1, a1, a2
12072 ; RV64ZVE32F-NEXT: fld fa7, 0(a1)
12073 ; RV64ZVE32F-NEXT: .LBB96_9: # %else20
12074 ; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
12075 ; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
12076 ; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
12077 ; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
12078 ; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
12079 ; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
12080 ; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
12081 ; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
12082 ; RV64ZVE32F-NEXT: ret
12083 ; RV64ZVE32F-NEXT: .LBB96_10: # %cond.load
12084 ; RV64ZVE32F-NEXT: ld a4, 0(a2)
12085 ; RV64ZVE32F-NEXT: slli a4, a4, 3
12086 ; RV64ZVE32F-NEXT: add a4, a1, a4
12087 ; RV64ZVE32F-NEXT: fld fa0, 0(a4)
12088 ; RV64ZVE32F-NEXT: andi a4, a3, 2
12089 ; RV64ZVE32F-NEXT: beqz a4, .LBB96_2
12090 ; RV64ZVE32F-NEXT: .LBB96_11: # %cond.load1
12091 ; RV64ZVE32F-NEXT: ld a4, 8(a2)
12092 ; RV64ZVE32F-NEXT: slli a4, a4, 3
12093 ; RV64ZVE32F-NEXT: add a4, a1, a4
12094 ; RV64ZVE32F-NEXT: fld fa1, 0(a4)
12095 ; RV64ZVE32F-NEXT: andi a4, a3, 4
12096 ; RV64ZVE32F-NEXT: beqz a4, .LBB96_3
12097 ; RV64ZVE32F-NEXT: .LBB96_12: # %cond.load4
12098 ; RV64ZVE32F-NEXT: ld a4, 16(a2)
12099 ; RV64ZVE32F-NEXT: slli a4, a4, 3
12100 ; RV64ZVE32F-NEXT: add a4, a1, a4
12101 ; RV64ZVE32F-NEXT: fld fa2, 0(a4)
12102 ; RV64ZVE32F-NEXT: andi a4, a3, 8
12103 ; RV64ZVE32F-NEXT: beqz a4, .LBB96_4
12104 ; RV64ZVE32F-NEXT: .LBB96_13: # %cond.load7
12105 ; RV64ZVE32F-NEXT: ld a4, 24(a2)
12106 ; RV64ZVE32F-NEXT: slli a4, a4, 3
12107 ; RV64ZVE32F-NEXT: add a4, a1, a4
12108 ; RV64ZVE32F-NEXT: fld fa3, 0(a4)
12109 ; RV64ZVE32F-NEXT: andi a4, a3, 16
12110 ; RV64ZVE32F-NEXT: beqz a4, .LBB96_5
12111 ; RV64ZVE32F-NEXT: .LBB96_14: # %cond.load10
12112 ; RV64ZVE32F-NEXT: ld a4, 32(a2)
12113 ; RV64ZVE32F-NEXT: slli a4, a4, 3
12114 ; RV64ZVE32F-NEXT: add a4, a1, a4
12115 ; RV64ZVE32F-NEXT: fld fa4, 0(a4)
12116 ; RV64ZVE32F-NEXT: andi a4, a3, 32
12117 ; RV64ZVE32F-NEXT: beqz a4, .LBB96_6
12118 ; RV64ZVE32F-NEXT: .LBB96_15: # %cond.load13
12119 ; RV64ZVE32F-NEXT: ld a4, 40(a2)
12120 ; RV64ZVE32F-NEXT: slli a4, a4, 3
12121 ; RV64ZVE32F-NEXT: add a4, a1, a4
12122 ; RV64ZVE32F-NEXT: fld fa5, 0(a4)
12123 ; RV64ZVE32F-NEXT: andi a4, a3, 64
12124 ; RV64ZVE32F-NEXT: beqz a4, .LBB96_7
12125 ; RV64ZVE32F-NEXT: .LBB96_16: # %cond.load16
12126 ; RV64ZVE32F-NEXT: ld a4, 48(a2)
12127 ; RV64ZVE32F-NEXT: slli a4, a4, 3
12128 ; RV64ZVE32F-NEXT: add a4, a1, a4
12129 ; RV64ZVE32F-NEXT: fld fa6, 0(a4)
12130 ; RV64ZVE32F-NEXT: andi a3, a3, -128
12131 ; RV64ZVE32F-NEXT: bnez a3, .LBB96_8
12132 ; RV64ZVE32F-NEXT: j .LBB96_9
12133 %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
12134 %v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
12135 ret <8 x double> %v
12138 declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>)
12140 define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m, <16 x i8> %passthru) {
12141 ; RV32-LABEL: mgather_baseidx_v16i8:
12143 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
12144 ; RV32-NEXT: vsext.vf4 v12, v8
12145 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
12146 ; RV32-NEXT: vluxei32.v v9, (a0), v12, v0.t
12147 ; RV32-NEXT: vmv.v.v v8, v9
12150 ; RV64V-LABEL: mgather_baseidx_v16i8:
12152 ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
12153 ; RV64V-NEXT: vsext.vf8 v16, v8
12154 ; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu
12155 ; RV64V-NEXT: vluxei64.v v9, (a0), v16, v0.t
12156 ; RV64V-NEXT: vmv.v.v v8, v9
12159 ; RV64ZVE32F-LABEL: mgather_baseidx_v16i8:
12160 ; RV64ZVE32F: # %bb.0:
12161 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
12162 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
12163 ; RV64ZVE32F-NEXT: andi a2, a1, 1
12164 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_2
12165 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
12166 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, mf4, tu, ma
12167 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12168 ; RV64ZVE32F-NEXT: add a2, a0, a2
12169 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12170 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
12171 ; RV64ZVE32F-NEXT: .LBB97_2: # %else
12172 ; RV64ZVE32F-NEXT: andi a2, a1, 2
12173 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_4
12174 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
12175 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12176 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
12177 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12178 ; RV64ZVE32F-NEXT: add a2, a0, a2
12179 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12180 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
12181 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
12182 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
12183 ; RV64ZVE32F-NEXT: .LBB97_4: # %else2
12184 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
12185 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
12186 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12187 ; RV64ZVE32F-NEXT: andi a2, a1, 4
12188 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
12189 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_25
12190 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
12191 ; RV64ZVE32F-NEXT: andi a2, a1, 8
12192 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_26
12193 ; RV64ZVE32F-NEXT: .LBB97_6: # %else8
12194 ; RV64ZVE32F-NEXT: andi a2, a1, 16
12195 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_8
12196 ; RV64ZVE32F-NEXT: .LBB97_7: # %cond.load10
12197 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
12198 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12199 ; RV64ZVE32F-NEXT: add a2, a0, a2
12200 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12201 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
12202 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 4
12203 ; RV64ZVE32F-NEXT: .LBB97_8: # %else11
12204 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
12205 ; RV64ZVE32F-NEXT: andi a2, a1, 32
12206 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8
12207 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_10
12208 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
12209 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12210 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
12211 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12212 ; RV64ZVE32F-NEXT: add a2, a0, a2
12213 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12214 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
12215 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma
12216 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 5
12217 ; RV64ZVE32F-NEXT: .LBB97_10: # %else14
12218 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12219 ; RV64ZVE32F-NEXT: andi a2, a1, 64
12220 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
12221 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_27
12222 ; RV64ZVE32F-NEXT: # %bb.11: # %else17
12223 ; RV64ZVE32F-NEXT: andi a2, a1, 128
12224 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_28
12225 ; RV64ZVE32F-NEXT: .LBB97_12: # %else20
12226 ; RV64ZVE32F-NEXT: andi a2, a1, 256
12227 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_29
12228 ; RV64ZVE32F-NEXT: .LBB97_13: # %else23
12229 ; RV64ZVE32F-NEXT: andi a2, a1, 512
12230 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_15
12231 ; RV64ZVE32F-NEXT: .LBB97_14: # %cond.load25
12232 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12233 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
12234 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12235 ; RV64ZVE32F-NEXT: add a2, a0, a2
12236 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12237 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
12238 ; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma
12239 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 9
12240 ; RV64ZVE32F-NEXT: .LBB97_15: # %else26
12241 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
12242 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
12243 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12244 ; RV64ZVE32F-NEXT: andi a2, a1, 1024
12245 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
12246 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_30
12247 ; RV64ZVE32F-NEXT: # %bb.16: # %else29
12248 ; RV64ZVE32F-NEXT: slli a2, a1, 52
12249 ; RV64ZVE32F-NEXT: bltz a2, .LBB97_31
12250 ; RV64ZVE32F-NEXT: .LBB97_17: # %else32
12251 ; RV64ZVE32F-NEXT: slli a2, a1, 51
12252 ; RV64ZVE32F-NEXT: bltz a2, .LBB97_32
12253 ; RV64ZVE32F-NEXT: .LBB97_18: # %else35
12254 ; RV64ZVE32F-NEXT: slli a2, a1, 50
12255 ; RV64ZVE32F-NEXT: bgez a2, .LBB97_20
12256 ; RV64ZVE32F-NEXT: .LBB97_19: # %cond.load37
12257 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12258 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
12259 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12260 ; RV64ZVE32F-NEXT: add a2, a0, a2
12261 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12262 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
12263 ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma
12264 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 13
12265 ; RV64ZVE32F-NEXT: .LBB97_20: # %else38
12266 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12267 ; RV64ZVE32F-NEXT: slli a2, a1, 49
12268 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
12269 ; RV64ZVE32F-NEXT: bgez a2, .LBB97_22
12270 ; RV64ZVE32F-NEXT: # %bb.21: # %cond.load40
12271 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12272 ; RV64ZVE32F-NEXT: add a2, a0, a2
12273 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12274 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
12275 ; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma
12276 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 14
12277 ; RV64ZVE32F-NEXT: .LBB97_22: # %else41
12278 ; RV64ZVE32F-NEXT: lui a2, 1048568
12279 ; RV64ZVE32F-NEXT: and a1, a1, a2
12280 ; RV64ZVE32F-NEXT: beqz a1, .LBB97_24
12281 ; RV64ZVE32F-NEXT: # %bb.23: # %cond.load43
12282 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12283 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
12284 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
12285 ; RV64ZVE32F-NEXT: add a0, a0, a1
12286 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
12287 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
12288 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma
12289 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 15
12290 ; RV64ZVE32F-NEXT: .LBB97_24: # %else44
12291 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9
12292 ; RV64ZVE32F-NEXT: ret
12293 ; RV64ZVE32F-NEXT: .LBB97_25: # %cond.load4
12294 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12295 ; RV64ZVE32F-NEXT: add a2, a0, a2
12296 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12297 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12298 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma
12299 ; RV64ZVE32F-NEXT: vslideup.vi v9, v12, 2
12300 ; RV64ZVE32F-NEXT: andi a2, a1, 8
12301 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_6
12302 ; RV64ZVE32F-NEXT: .LBB97_26: # %cond.load7
12303 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12304 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
12305 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11
12306 ; RV64ZVE32F-NEXT: add a2, a0, a2
12307 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12308 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
12309 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma
12310 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 3
12311 ; RV64ZVE32F-NEXT: andi a2, a1, 16
12312 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_7
12313 ; RV64ZVE32F-NEXT: j .LBB97_8
12314 ; RV64ZVE32F-NEXT: .LBB97_27: # %cond.load16
12315 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12316 ; RV64ZVE32F-NEXT: add a2, a0, a2
12317 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12318 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
12319 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
12320 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 6
12321 ; RV64ZVE32F-NEXT: andi a2, a1, 128
12322 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_12
12323 ; RV64ZVE32F-NEXT: .LBB97_28: # %cond.load19
12324 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12325 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
12326 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12327 ; RV64ZVE32F-NEXT: add a2, a0, a2
12328 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12329 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
12330 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
12331 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 7
12332 ; RV64ZVE32F-NEXT: andi a2, a1, 256
12333 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_13
12334 ; RV64ZVE32F-NEXT: .LBB97_29: # %cond.load22
12335 ; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
12336 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12337 ; RV64ZVE32F-NEXT: add a2, a0, a2
12338 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12339 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
12340 ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 8
12341 ; RV64ZVE32F-NEXT: andi a2, a1, 512
12342 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_14
12343 ; RV64ZVE32F-NEXT: j .LBB97_15
12344 ; RV64ZVE32F-NEXT: .LBB97_30: # %cond.load28
12345 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12346 ; RV64ZVE32F-NEXT: add a2, a0, a2
12347 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12348 ; RV64ZVE32F-NEXT: vmv.s.x v11, a2
12349 ; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma
12350 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 10
12351 ; RV64ZVE32F-NEXT: slli a2, a1, 52
12352 ; RV64ZVE32F-NEXT: bgez a2, .LBB97_17
12353 ; RV64ZVE32F-NEXT: .LBB97_31: # %cond.load31
12354 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12355 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
12356 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12357 ; RV64ZVE32F-NEXT: add a2, a0, a2
12358 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12359 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
12360 ; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma
12361 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 11
12362 ; RV64ZVE32F-NEXT: slli a2, a1, 51
12363 ; RV64ZVE32F-NEXT: bgez a2, .LBB97_18
12364 ; RV64ZVE32F-NEXT: .LBB97_32: # %cond.load34
12365 ; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma
12366 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10
12367 ; RV64ZVE32F-NEXT: add a2, a0, a2
12368 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12369 ; RV64ZVE32F-NEXT: vmv.s.x v8, a2
12370 ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 12
12371 ; RV64ZVE32F-NEXT: slli a2, a1, 50
12372 ; RV64ZVE32F-NEXT: bltz a2, .LBB97_19
12373 ; RV64ZVE32F-NEXT: j .LBB97_20
12374 %ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
12375 %v = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 2, <16 x i1> %m, <16 x i8> %passthru)
12379 declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>)
12381 define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m, <32 x i8> %passthru) {
12382 ; RV32-LABEL: mgather_baseidx_v32i8:
12384 ; RV32-NEXT: li a1, 32
12385 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
12386 ; RV32-NEXT: vsext.vf4 v16, v8
12387 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu
12388 ; RV32-NEXT: vluxei32.v v10, (a0), v16, v0.t
12389 ; RV32-NEXT: vmv.v.v v8, v10
12392 ; RV64V-LABEL: mgather_baseidx_v32i8:
12394 ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
12395 ; RV64V-NEXT: vsext.vf8 v16, v8
12396 ; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu
12397 ; RV64V-NEXT: vmv1r.v v12, v10
12398 ; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t
12399 ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma
12400 ; RV64V-NEXT: vslidedown.vi v10, v10, 16
12401 ; RV64V-NEXT: vslidedown.vi v8, v8, 16
12402 ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
12403 ; RV64V-NEXT: vsext.vf8 v16, v8
12404 ; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12405 ; RV64V-NEXT: vslidedown.vi v0, v0, 2
12406 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu
12407 ; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t
12408 ; RV64V-NEXT: li a0, 32
12409 ; RV64V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
12410 ; RV64V-NEXT: vslideup.vi v12, v10, 16
12411 ; RV64V-NEXT: vmv.v.v v8, v12
12414 ; RV64ZVE32F-LABEL: mgather_baseidx_v32i8:
12415 ; RV64ZVE32F: # %bb.0:
12416 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
12417 ; RV64ZVE32F-NEXT: vmv.x.s a1, v0
12418 ; RV64ZVE32F-NEXT: andi a2, a1, 1
12419 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_2
12420 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
12421 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
12422 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12423 ; RV64ZVE32F-NEXT: add a2, a0, a2
12424 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12425 ; RV64ZVE32F-NEXT: li a3, 32
12426 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, tu, ma
12427 ; RV64ZVE32F-NEXT: vmv.s.x v10, a2
12428 ; RV64ZVE32F-NEXT: .LBB98_2: # %else
12429 ; RV64ZVE32F-NEXT: andi a2, a1, 2
12430 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_4
12431 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
12432 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12433 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
12434 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12435 ; RV64ZVE32F-NEXT: add a2, a0, a2
12436 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12437 ; RV64ZVE32F-NEXT: li a3, 32
12438 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12439 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
12440 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
12441 ; RV64ZVE32F-NEXT: .LBB98_4: # %else2
12442 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
12443 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 4
12444 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12445 ; RV64ZVE32F-NEXT: andi a2, a1, 4
12446 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
12447 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_49
12448 ; RV64ZVE32F-NEXT: # %bb.5: # %else5
12449 ; RV64ZVE32F-NEXT: andi a2, a1, 8
12450 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_50
12451 ; RV64ZVE32F-NEXT: .LBB98_6: # %else8
12452 ; RV64ZVE32F-NEXT: andi a2, a1, 16
12453 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_8
12454 ; RV64ZVE32F-NEXT: .LBB98_7: # %cond.load10
12455 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12456 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
12457 ; RV64ZVE32F-NEXT: add a2, a0, a2
12458 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12459 ; RV64ZVE32F-NEXT: li a3, 32
12460 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
12461 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12462 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
12463 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4
12464 ; RV64ZVE32F-NEXT: .LBB98_8: # %else11
12465 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
12466 ; RV64ZVE32F-NEXT: andi a2, a1, 32
12467 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 8
12468 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_10
12469 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
12470 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12471 ; RV64ZVE32F-NEXT: vslidedown.vi v14, v13, 1
12472 ; RV64ZVE32F-NEXT: vmv.x.s a2, v14
12473 ; RV64ZVE32F-NEXT: add a2, a0, a2
12474 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12475 ; RV64ZVE32F-NEXT: li a3, 32
12476 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2
12477 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma
12478 ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 5
12479 ; RV64ZVE32F-NEXT: .LBB98_10: # %else14
12480 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12481 ; RV64ZVE32F-NEXT: andi a2, a1, 64
12482 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
12483 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_51
12484 ; RV64ZVE32F-NEXT: # %bb.11: # %else17
12485 ; RV64ZVE32F-NEXT: andi a2, a1, 128
12486 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_52
12487 ; RV64ZVE32F-NEXT: .LBB98_12: # %else20
12488 ; RV64ZVE32F-NEXT: andi a2, a1, 256
12489 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_53
12490 ; RV64ZVE32F-NEXT: .LBB98_13: # %else23
12491 ; RV64ZVE32F-NEXT: andi a2, a1, 512
12492 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_15
12493 ; RV64ZVE32F-NEXT: .LBB98_14: # %cond.load25
12494 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12495 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
12496 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
12497 ; RV64ZVE32F-NEXT: add a2, a0, a2
12498 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12499 ; RV64ZVE32F-NEXT: li a3, 32
12500 ; RV64ZVE32F-NEXT: vmv.s.x v13, a2
12501 ; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma
12502 ; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 9
12503 ; RV64ZVE32F-NEXT: .LBB98_15: # %else26
12504 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
12505 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4
12506 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12507 ; RV64ZVE32F-NEXT: andi a2, a1, 1024
12508 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
12509 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_17
12510 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.load28
12511 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12512 ; RV64ZVE32F-NEXT: add a2, a0, a2
12513 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12514 ; RV64ZVE32F-NEXT: li a3, 32
12515 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2
12516 ; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma
12517 ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 10
12518 ; RV64ZVE32F-NEXT: .LBB98_17: # %else29
12519 ; RV64ZVE32F-NEXT: slli a2, a1, 52
12520 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_19
12521 ; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31
12522 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12523 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
12524 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12525 ; RV64ZVE32F-NEXT: add a2, a0, a2
12526 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12527 ; RV64ZVE32F-NEXT: li a3, 32
12528 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12529 ; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma
12530 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 11
12531 ; RV64ZVE32F-NEXT: .LBB98_19: # %else32
12532 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
12533 ; RV64ZVE32F-NEXT: slli a2, a1, 51
12534 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16
12535 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_21
12536 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34
12537 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
12538 ; RV64ZVE32F-NEXT: add a2, a0, a2
12539 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12540 ; RV64ZVE32F-NEXT: li a3, 32
12541 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
12542 ; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma
12543 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 12
12544 ; RV64ZVE32F-NEXT: .LBB98_21: # %else35
12545 ; RV64ZVE32F-NEXT: slli a2, a1, 50
12546 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_23
12547 ; RV64ZVE32F-NEXT: # %bb.22: # %cond.load37
12548 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12549 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 1
12550 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12551 ; RV64ZVE32F-NEXT: add a2, a0, a2
12552 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12553 ; RV64ZVE32F-NEXT: li a3, 32
12554 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
12555 ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma
12556 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 13
12557 ; RV64ZVE32F-NEXT: .LBB98_23: # %else38
12558 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12559 ; RV64ZVE32F-NEXT: slli a2, a1, 49
12560 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 2
12561 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_54
12562 ; RV64ZVE32F-NEXT: # %bb.24: # %else41
12563 ; RV64ZVE32F-NEXT: slli a2, a1, 48
12564 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_55
12565 ; RV64ZVE32F-NEXT: .LBB98_25: # %else44
12566 ; RV64ZVE32F-NEXT: slli a2, a1, 47
12567 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_56
12568 ; RV64ZVE32F-NEXT: .LBB98_26: # %else47
12569 ; RV64ZVE32F-NEXT: slli a2, a1, 46
12570 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_28
12571 ; RV64ZVE32F-NEXT: .LBB98_27: # %cond.load49
12572 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12573 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
12574 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12575 ; RV64ZVE32F-NEXT: add a2, a0, a2
12576 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12577 ; RV64ZVE32F-NEXT: li a3, 32
12578 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12579 ; RV64ZVE32F-NEXT: vsetivli zero, 18, e8, m2, tu, ma
12580 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 17
12581 ; RV64ZVE32F-NEXT: .LBB98_28: # %else50
12582 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
12583 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
12584 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12585 ; RV64ZVE32F-NEXT: slli a2, a1, 45
12586 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
12587 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_57
12588 ; RV64ZVE32F-NEXT: # %bb.29: # %else53
12589 ; RV64ZVE32F-NEXT: slli a2, a1, 44
12590 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_58
12591 ; RV64ZVE32F-NEXT: .LBB98_30: # %else56
12592 ; RV64ZVE32F-NEXT: slli a2, a1, 43
12593 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_32
12594 ; RV64ZVE32F-NEXT: .LBB98_31: # %cond.load58
12595 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12596 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12597 ; RV64ZVE32F-NEXT: add a2, a0, a2
12598 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12599 ; RV64ZVE32F-NEXT: li a3, 32
12600 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
12601 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12602 ; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, ma
12603 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 20
12604 ; RV64ZVE32F-NEXT: .LBB98_32: # %else59
12605 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
12606 ; RV64ZVE32F-NEXT: slli a2, a1, 42
12607 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8
12608 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_34
12609 ; RV64ZVE32F-NEXT: # %bb.33: # %cond.load61
12610 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12611 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 1
12612 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12613 ; RV64ZVE32F-NEXT: add a2, a0, a2
12614 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12615 ; RV64ZVE32F-NEXT: li a3, 32
12616 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12617 ; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, ma
12618 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 21
12619 ; RV64ZVE32F-NEXT: .LBB98_34: # %else62
12620 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12621 ; RV64ZVE32F-NEXT: slli a2, a1, 41
12622 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
12623 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_59
12624 ; RV64ZVE32F-NEXT: # %bb.35: # %else65
12625 ; RV64ZVE32F-NEXT: slli a2, a1, 40
12626 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_60
12627 ; RV64ZVE32F-NEXT: .LBB98_36: # %else68
12628 ; RV64ZVE32F-NEXT: slli a2, a1, 39
12629 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_61
12630 ; RV64ZVE32F-NEXT: .LBB98_37: # %else71
12631 ; RV64ZVE32F-NEXT: slli a2, a1, 38
12632 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_39
12633 ; RV64ZVE32F-NEXT: .LBB98_38: # %cond.load73
12634 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12635 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
12636 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12637 ; RV64ZVE32F-NEXT: add a2, a0, a2
12638 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12639 ; RV64ZVE32F-NEXT: li a3, 32
12640 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12641 ; RV64ZVE32F-NEXT: vsetivli zero, 26, e8, m2, tu, ma
12642 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 25
12643 ; RV64ZVE32F-NEXT: .LBB98_39: # %else74
12644 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
12645 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
12646 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12647 ; RV64ZVE32F-NEXT: slli a2, a1, 37
12648 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
12649 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_62
12650 ; RV64ZVE32F-NEXT: # %bb.40: # %else77
12651 ; RV64ZVE32F-NEXT: slli a2, a1, 36
12652 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_63
12653 ; RV64ZVE32F-NEXT: .LBB98_41: # %else80
12654 ; RV64ZVE32F-NEXT: slli a2, a1, 35
12655 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_64
12656 ; RV64ZVE32F-NEXT: .LBB98_42: # %else83
12657 ; RV64ZVE32F-NEXT: slli a2, a1, 34
12658 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_44
12659 ; RV64ZVE32F-NEXT: .LBB98_43: # %cond.load85
12660 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12661 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
12662 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12663 ; RV64ZVE32F-NEXT: add a2, a0, a2
12664 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12665 ; RV64ZVE32F-NEXT: li a3, 32
12666 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12667 ; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, ma
12668 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29
12669 ; RV64ZVE32F-NEXT: .LBB98_44: # %else86
12670 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
12671 ; RV64ZVE32F-NEXT: slli a2, a1, 33
12672 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
12673 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_46
12674 ; RV64ZVE32F-NEXT: # %bb.45: # %cond.load88
12675 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12676 ; RV64ZVE32F-NEXT: add a2, a0, a2
12677 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12678 ; RV64ZVE32F-NEXT: li a3, 32
12679 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12680 ; RV64ZVE32F-NEXT: vsetivli zero, 31, e8, m2, tu, ma
12681 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 30
12682 ; RV64ZVE32F-NEXT: .LBB98_46: # %else89
12683 ; RV64ZVE32F-NEXT: lui a2, 524288
12684 ; RV64ZVE32F-NEXT: and a1, a1, a2
12685 ; RV64ZVE32F-NEXT: beqz a1, .LBB98_48
12686 ; RV64ZVE32F-NEXT: # %bb.47: # %cond.load91
12687 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12688 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
12689 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
12690 ; RV64ZVE32F-NEXT: add a0, a0, a1
12691 ; RV64ZVE32F-NEXT: lbu a0, 0(a0)
12692 ; RV64ZVE32F-NEXT: li a1, 32
12693 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0
12694 ; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m2, ta, ma
12695 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 31
12696 ; RV64ZVE32F-NEXT: .LBB98_48: # %else92
12697 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10
12698 ; RV64ZVE32F-NEXT: ret
12699 ; RV64ZVE32F-NEXT: .LBB98_49: # %cond.load4
12700 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12701 ; RV64ZVE32F-NEXT: add a2, a0, a2
12702 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12703 ; RV64ZVE32F-NEXT: li a3, 32
12704 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2
12705 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma
12706 ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2
12707 ; RV64ZVE32F-NEXT: andi a2, a1, 8
12708 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_6
12709 ; RV64ZVE32F-NEXT: .LBB98_50: # %cond.load7
12710 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12711 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
12712 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12713 ; RV64ZVE32F-NEXT: add a2, a0, a2
12714 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12715 ; RV64ZVE32F-NEXT: li a3, 32
12716 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12717 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma
12718 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
12719 ; RV64ZVE32F-NEXT: andi a2, a1, 16
12720 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_7
12721 ; RV64ZVE32F-NEXT: j .LBB98_8
12722 ; RV64ZVE32F-NEXT: .LBB98_51: # %cond.load16
12723 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
12724 ; RV64ZVE32F-NEXT: add a2, a0, a2
12725 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12726 ; RV64ZVE32F-NEXT: li a3, 32
12727 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2
12728 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
12729 ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6
12730 ; RV64ZVE32F-NEXT: andi a2, a1, 128
12731 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_12
12732 ; RV64ZVE32F-NEXT: .LBB98_52: # %cond.load19
12733 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12734 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
12735 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13
12736 ; RV64ZVE32F-NEXT: add a2, a0, a2
12737 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12738 ; RV64ZVE32F-NEXT: li a3, 32
12739 ; RV64ZVE32F-NEXT: vmv.s.x v13, a2
12740 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
12741 ; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 7
12742 ; RV64ZVE32F-NEXT: andi a2, a1, 256
12743 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_13
12744 ; RV64ZVE32F-NEXT: .LBB98_53: # %cond.load22
12745 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12746 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12747 ; RV64ZVE32F-NEXT: add a2, a0, a2
12748 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12749 ; RV64ZVE32F-NEXT: li a3, 32
12750 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
12751 ; RV64ZVE32F-NEXT: vmv.s.x v13, a2
12752 ; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
12753 ; RV64ZVE32F-NEXT: vslideup.vi v10, v13, 8
12754 ; RV64ZVE32F-NEXT: andi a2, a1, 512
12755 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_14
12756 ; RV64ZVE32F-NEXT: j .LBB98_15
12757 ; RV64ZVE32F-NEXT: .LBB98_54: # %cond.load40
12758 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12759 ; RV64ZVE32F-NEXT: add a2, a0, a2
12760 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12761 ; RV64ZVE32F-NEXT: li a3, 32
12762 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12763 ; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma
12764 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 14
12765 ; RV64ZVE32F-NEXT: slli a2, a1, 48
12766 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_25
12767 ; RV64ZVE32F-NEXT: .LBB98_55: # %cond.load43
12768 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12769 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
12770 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12771 ; RV64ZVE32F-NEXT: add a2, a0, a2
12772 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12773 ; RV64ZVE32F-NEXT: li a3, 32
12774 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
12775 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, ma
12776 ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 15
12777 ; RV64ZVE32F-NEXT: slli a2, a1, 47
12778 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_26
12779 ; RV64ZVE32F-NEXT: .LBB98_56: # %cond.load46
12780 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12781 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12782 ; RV64ZVE32F-NEXT: add a2, a0, a2
12783 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12784 ; RV64ZVE32F-NEXT: li a3, 32
12785 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
12786 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12787 ; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, ma
12788 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16
12789 ; RV64ZVE32F-NEXT: slli a2, a1, 46
12790 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_27
12791 ; RV64ZVE32F-NEXT: j .LBB98_28
12792 ; RV64ZVE32F-NEXT: .LBB98_57: # %cond.load52
12793 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12794 ; RV64ZVE32F-NEXT: add a2, a0, a2
12795 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12796 ; RV64ZVE32F-NEXT: li a3, 32
12797 ; RV64ZVE32F-NEXT: vmv.s.x v14, a2
12798 ; RV64ZVE32F-NEXT: vsetivli zero, 19, e8, m2, tu, ma
12799 ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 18
12800 ; RV64ZVE32F-NEXT: slli a2, a1, 44
12801 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_30
12802 ; RV64ZVE32F-NEXT: .LBB98_58: # %cond.load55
12803 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12804 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
12805 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12
12806 ; RV64ZVE32F-NEXT: add a2, a0, a2
12807 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12808 ; RV64ZVE32F-NEXT: li a3, 32
12809 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12810 ; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, ma
12811 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 19
12812 ; RV64ZVE32F-NEXT: slli a2, a1, 43
12813 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_31
12814 ; RV64ZVE32F-NEXT: j .LBB98_32
12815 ; RV64ZVE32F-NEXT: .LBB98_59: # %cond.load64
12816 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12817 ; RV64ZVE32F-NEXT: add a2, a0, a2
12818 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12819 ; RV64ZVE32F-NEXT: li a3, 32
12820 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12821 ; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, ma
12822 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22
12823 ; RV64ZVE32F-NEXT: slli a2, a1, 40
12824 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_36
12825 ; RV64ZVE32F-NEXT: .LBB98_60: # %cond.load67
12826 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12827 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
12828 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12829 ; RV64ZVE32F-NEXT: add a2, a0, a2
12830 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12831 ; RV64ZVE32F-NEXT: li a3, 32
12832 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12833 ; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, ma
12834 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23
12835 ; RV64ZVE32F-NEXT: slli a2, a1, 39
12836 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_37
12837 ; RV64ZVE32F-NEXT: .LBB98_61: # %cond.load70
12838 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12839 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12840 ; RV64ZVE32F-NEXT: add a2, a0, a2
12841 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12842 ; RV64ZVE32F-NEXT: li a3, 32
12843 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
12844 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12845 ; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, ma
12846 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24
12847 ; RV64ZVE32F-NEXT: slli a2, a1, 38
12848 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_38
12849 ; RV64ZVE32F-NEXT: j .LBB98_39
12850 ; RV64ZVE32F-NEXT: .LBB98_62: # %cond.load76
12851 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12852 ; RV64ZVE32F-NEXT: add a2, a0, a2
12853 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12854 ; RV64ZVE32F-NEXT: li a3, 32
12855 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12856 ; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, ma
12857 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26
12858 ; RV64ZVE32F-NEXT: slli a2, a1, 36
12859 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_41
12860 ; RV64ZVE32F-NEXT: .LBB98_63: # %cond.load79
12861 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12862 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
12863 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8
12864 ; RV64ZVE32F-NEXT: add a2, a0, a2
12865 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12866 ; RV64ZVE32F-NEXT: li a3, 32
12867 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12868 ; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, ma
12869 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27
12870 ; RV64ZVE32F-NEXT: slli a2, a1, 35
12871 ; RV64ZVE32F-NEXT: bgez a2, .LBB98_42
12872 ; RV64ZVE32F-NEXT: .LBB98_64: # %cond.load82
12873 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
12874 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9
12875 ; RV64ZVE32F-NEXT: add a2, a0, a2
12876 ; RV64ZVE32F-NEXT: lbu a2, 0(a2)
12877 ; RV64ZVE32F-NEXT: li a3, 32
12878 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, ta, ma
12879 ; RV64ZVE32F-NEXT: vmv.s.x v12, a2
12880 ; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, ma
12881 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28
12882 ; RV64ZVE32F-NEXT: slli a2, a1, 34
12883 ; RV64ZVE32F-NEXT: bltz a2, .LBB98_43
12884 ; RV64ZVE32F-NEXT: j .LBB98_44
12885 %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
12886 %v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru)
12891 define <4 x i32> @mgather_broadcast_load_unmasked(ptr %base) {
12892 ; CHECK-LABEL: mgather_broadcast_load_unmasked:
12894 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
12895 ; CHECK-NEXT: vlse32.v v8, (a0), zero
12897 %head = insertelement <4 x i1> poison, i1 true, i32 0
12898 %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
12899 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer
12900 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison)
12904 ; Same as previous, but use an explicit splat instead of splat-via-gep
12905 define <4 x i32> @mgather_broadcast_load_unmasked2(ptr %base) {
12906 ; CHECK-LABEL: mgather_broadcast_load_unmasked2:
12908 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
12909 ; CHECK-NEXT: vlse32.v v8, (a0), zero
12911 %head = insertelement <4 x i1> poison, i1 true, i32 0
12912 %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
12913 %ptrhead = insertelement <4 x ptr> poison, ptr %base, i32 0
12914 %ptrs = shufflevector <4 x ptr> %ptrhead, <4 x ptr> poison, <4 x i32> zeroinitializer
12915 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison)
12919 define <4 x i32> @mgather_broadcast_load_masked(ptr %base, <4 x i1> %m) {
12920 ; CHECK-LABEL: mgather_broadcast_load_masked:
12922 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
12923 ; CHECK-NEXT: vlse32.v v8, (a0), zero, v0.t
12925 %head = insertelement <4 x i1> poison, i1 true, i32 0
12926 %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
12927 %ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer
12928 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x i32> poison)
12932 define <4 x i32> @mgather_unit_stride_load(ptr %base) {
12933 ; CHECK-LABEL: mgather_unit_stride_load:
12935 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
12936 ; CHECK-NEXT: vle32.v v8, (a0)
12938 %head = insertelement <4 x i1> poison, i1 true, i32 0
12939 %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
12940 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
12941 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison)
12945 define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) {
12946 ; CHECK-LABEL: mgather_unit_stride_load_with_offset:
12948 ; CHECK-NEXT: addi a0, a0, 16
12949 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
12950 ; CHECK-NEXT: vle32.v v8, (a0)
12952 %head = insertelement <4 x i1> poison, i1 true, i32 0
12953 %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
12954 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
12955 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison)
12959 define <4 x i32> @mgather_unit_stride_load_narrow_idx(ptr %base) {
12960 ; CHECK-LABEL: mgather_unit_stride_load_narrow_idx:
12962 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
12963 ; CHECK-NEXT: vle32.v v8, (a0)
12965 %head = insertelement <4 x i1> poison, i1 true, i32 0
12966 %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
12967 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> <i8 0, i8 1, i8 2, i8 3>
12968 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison)
12972 define <4 x i32> @mgather_unit_stride_load_wide_idx(ptr %base) {
12973 ; CHECK-LABEL: mgather_unit_stride_load_wide_idx:
12975 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
12976 ; CHECK-NEXT: vle32.v v8, (a0)
12978 %head = insertelement <4 x i1> poison, i1 true, i32 0
12979 %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
12980 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i128> <i128 0, i128 1, i128 2, i128 3>
12981 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison)
12985 ; This looks like a strided load (at i8), but isn't at index type.
12986 define <4 x i32> @mgather_narrow_edge_case(ptr %base) {
12987 ; RV32-LABEL: mgather_narrow_edge_case:
12989 ; RV32-NEXT: li a1, -512
12990 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
12991 ; RV32-NEXT: vmv.v.i v0, 5
12992 ; RV32-NEXT: vmv.v.x v8, a1
12993 ; RV32-NEXT: vmerge.vim v8, v8, 0, v0
12994 ; RV32-NEXT: vluxei32.v v8, (a0), v8
12997 ; RV64V-LABEL: mgather_narrow_edge_case:
12999 ; RV64V-NEXT: li a1, -512
13000 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
13001 ; RV64V-NEXT: vmv.v.x v8, a1
13002 ; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
13003 ; RV64V-NEXT: vmv.v.i v0, 5
13004 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
13005 ; RV64V-NEXT: vmerge.vim v10, v8, 0, v0
13006 ; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
13007 ; RV64V-NEXT: vluxei64.v v8, (a0), v10
13010 ; RV64ZVE32F-LABEL: mgather_narrow_edge_case:
13011 ; RV64ZVE32F: # %bb.0:
13012 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
13013 ; RV64ZVE32F-NEXT: vmset.m v8
13014 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
13015 ; RV64ZVE32F-NEXT: # implicit-def: $v8
13016 ; RV64ZVE32F-NEXT: beqz zero, .LBB106_5
13017 ; RV64ZVE32F-NEXT: # %bb.1: # %else
13018 ; RV64ZVE32F-NEXT: andi a2, a1, 2
13019 ; RV64ZVE32F-NEXT: bnez a2, .LBB106_6
13020 ; RV64ZVE32F-NEXT: .LBB106_2: # %else2
13021 ; RV64ZVE32F-NEXT: andi a2, a1, 4
13022 ; RV64ZVE32F-NEXT: bnez a2, .LBB106_7
13023 ; RV64ZVE32F-NEXT: .LBB106_3: # %else5
13024 ; RV64ZVE32F-NEXT: andi a1, a1, 8
13025 ; RV64ZVE32F-NEXT: bnez a1, .LBB106_8
13026 ; RV64ZVE32F-NEXT: .LBB106_4: # %else8
13027 ; RV64ZVE32F-NEXT: ret
13028 ; RV64ZVE32F-NEXT: .LBB106_5: # %cond.load
13029 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13030 ; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
13031 ; RV64ZVE32F-NEXT: andi a2, a1, 2
13032 ; RV64ZVE32F-NEXT: beqz a2, .LBB106_2
13033 ; RV64ZVE32F-NEXT: .LBB106_6: # %cond.load1
13034 ; RV64ZVE32F-NEXT: lw a2, -512(a0)
13035 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
13036 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13037 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
13038 ; RV64ZVE32F-NEXT: andi a2, a1, 4
13039 ; RV64ZVE32F-NEXT: beqz a2, .LBB106_3
13040 ; RV64ZVE32F-NEXT: .LBB106_7: # %cond.load4
13041 ; RV64ZVE32F-NEXT: lw a2, 0(a0)
13042 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
13043 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13044 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
13045 ; RV64ZVE32F-NEXT: andi a1, a1, 8
13046 ; RV64ZVE32F-NEXT: beqz a1, .LBB106_4
13047 ; RV64ZVE32F-NEXT: .LBB106_8: # %cond.load7
13048 ; RV64ZVE32F-NEXT: lw a0, -512(a0)
13049 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13050 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
13051 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
13052 ; RV64ZVE32F-NEXT: ret
13053 %head = insertelement <4 x i1> poison, i1 true, i32 0
13054 %allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
13055 %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> <i8 0, i8 128, i8 0, i8 128>
13056 %v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison)
13060 define <8 x i16> @mgather_strided_unaligned(ptr %base) {
13061 ; RV32V-LABEL: mgather_strided_unaligned:
13063 ; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
13064 ; RV32V-NEXT: vmset.m v8
13065 ; RV32V-NEXT: vid.v v10
13066 ; RV32V-NEXT: vsll.vi v10, v10, 2
13067 ; RV32V-NEXT: vadd.vx v10, v10, a0
13068 ; RV32V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
13069 ; RV32V-NEXT: vmv.x.s a0, v8
13070 ; RV32V-NEXT: # implicit-def: $v8
13071 ; RV32V-NEXT: beqz zero, .LBB107_9
13072 ; RV32V-NEXT: # %bb.1: # %else
13073 ; RV32V-NEXT: andi a1, a0, 2
13074 ; RV32V-NEXT: bnez a1, .LBB107_10
13075 ; RV32V-NEXT: .LBB107_2: # %else2
13076 ; RV32V-NEXT: andi a1, a0, 4
13077 ; RV32V-NEXT: bnez a1, .LBB107_11
13078 ; RV32V-NEXT: .LBB107_3: # %else5
13079 ; RV32V-NEXT: andi a1, a0, 8
13080 ; RV32V-NEXT: bnez a1, .LBB107_12
13081 ; RV32V-NEXT: .LBB107_4: # %else8
13082 ; RV32V-NEXT: andi a1, a0, 16
13083 ; RV32V-NEXT: bnez a1, .LBB107_13
13084 ; RV32V-NEXT: .LBB107_5: # %else11
13085 ; RV32V-NEXT: andi a1, a0, 32
13086 ; RV32V-NEXT: bnez a1, .LBB107_14
13087 ; RV32V-NEXT: .LBB107_6: # %else14
13088 ; RV32V-NEXT: andi a1, a0, 64
13089 ; RV32V-NEXT: bnez a1, .LBB107_15
13090 ; RV32V-NEXT: .LBB107_7: # %else17
13091 ; RV32V-NEXT: andi a0, a0, -128
13092 ; RV32V-NEXT: bnez a0, .LBB107_16
13093 ; RV32V-NEXT: .LBB107_8: # %else20
13095 ; RV32V-NEXT: .LBB107_9: # %cond.load
13096 ; RV32V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13097 ; RV32V-NEXT: vmv.x.s a1, v10
13098 ; RV32V-NEXT: lbu a2, 1(a1)
13099 ; RV32V-NEXT: lbu a1, 0(a1)
13100 ; RV32V-NEXT: slli a2, a2, 8
13101 ; RV32V-NEXT: or a1, a2, a1
13102 ; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13103 ; RV32V-NEXT: vmv.v.x v8, a1
13104 ; RV32V-NEXT: andi a1, a0, 2
13105 ; RV32V-NEXT: beqz a1, .LBB107_2
13106 ; RV32V-NEXT: .LBB107_10: # %cond.load1
13107 ; RV32V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13108 ; RV32V-NEXT: vslidedown.vi v9, v10, 1
13109 ; RV32V-NEXT: vmv.x.s a1, v9
13110 ; RV32V-NEXT: lbu a2, 1(a1)
13111 ; RV32V-NEXT: lbu a1, 0(a1)
13112 ; RV32V-NEXT: slli a2, a2, 8
13113 ; RV32V-NEXT: or a1, a2, a1
13114 ; RV32V-NEXT: vmv.s.x v9, a1
13115 ; RV32V-NEXT: vsetivli zero, 2, e16, m1, tu, ma
13116 ; RV32V-NEXT: vslideup.vi v8, v9, 1
13117 ; RV32V-NEXT: andi a1, a0, 4
13118 ; RV32V-NEXT: beqz a1, .LBB107_3
13119 ; RV32V-NEXT: .LBB107_11: # %cond.load4
13120 ; RV32V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13121 ; RV32V-NEXT: vslidedown.vi v9, v10, 2
13122 ; RV32V-NEXT: vmv.x.s a1, v9
13123 ; RV32V-NEXT: lbu a2, 1(a1)
13124 ; RV32V-NEXT: lbu a1, 0(a1)
13125 ; RV32V-NEXT: slli a2, a2, 8
13126 ; RV32V-NEXT: or a1, a2, a1
13127 ; RV32V-NEXT: vmv.s.x v9, a1
13128 ; RV32V-NEXT: vsetivli zero, 3, e16, m1, tu, ma
13129 ; RV32V-NEXT: vslideup.vi v8, v9, 2
13130 ; RV32V-NEXT: andi a1, a0, 8
13131 ; RV32V-NEXT: beqz a1, .LBB107_4
13132 ; RV32V-NEXT: .LBB107_12: # %cond.load7
13133 ; RV32V-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13134 ; RV32V-NEXT: vslidedown.vi v9, v10, 3
13135 ; RV32V-NEXT: vmv.x.s a1, v9
13136 ; RV32V-NEXT: lbu a2, 1(a1)
13137 ; RV32V-NEXT: lbu a1, 0(a1)
13138 ; RV32V-NEXT: slli a2, a2, 8
13139 ; RV32V-NEXT: or a1, a2, a1
13140 ; RV32V-NEXT: vmv.s.x v9, a1
13141 ; RV32V-NEXT: vsetivli zero, 4, e16, m1, tu, ma
13142 ; RV32V-NEXT: vslideup.vi v8, v9, 3
13143 ; RV32V-NEXT: andi a1, a0, 16
13144 ; RV32V-NEXT: beqz a1, .LBB107_5
13145 ; RV32V-NEXT: .LBB107_13: # %cond.load10
13146 ; RV32V-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13147 ; RV32V-NEXT: vslidedown.vi v12, v10, 4
13148 ; RV32V-NEXT: vmv.x.s a1, v12
13149 ; RV32V-NEXT: lbu a2, 1(a1)
13150 ; RV32V-NEXT: lbu a1, 0(a1)
13151 ; RV32V-NEXT: slli a2, a2, 8
13152 ; RV32V-NEXT: or a1, a2, a1
13153 ; RV32V-NEXT: vmv.s.x v9, a1
13154 ; RV32V-NEXT: vsetivli zero, 5, e16, m1, tu, ma
13155 ; RV32V-NEXT: vslideup.vi v8, v9, 4
13156 ; RV32V-NEXT: andi a1, a0, 32
13157 ; RV32V-NEXT: beqz a1, .LBB107_6
13158 ; RV32V-NEXT: .LBB107_14: # %cond.load13
13159 ; RV32V-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13160 ; RV32V-NEXT: vslidedown.vi v12, v10, 5
13161 ; RV32V-NEXT: vmv.x.s a1, v12
13162 ; RV32V-NEXT: lbu a2, 1(a1)
13163 ; RV32V-NEXT: lbu a1, 0(a1)
13164 ; RV32V-NEXT: slli a2, a2, 8
13165 ; RV32V-NEXT: or a1, a2, a1
13166 ; RV32V-NEXT: vmv.s.x v9, a1
13167 ; RV32V-NEXT: vsetivli zero, 6, e16, m1, tu, ma
13168 ; RV32V-NEXT: vslideup.vi v8, v9, 5
13169 ; RV32V-NEXT: andi a1, a0, 64
13170 ; RV32V-NEXT: beqz a1, .LBB107_7
13171 ; RV32V-NEXT: .LBB107_15: # %cond.load16
13172 ; RV32V-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13173 ; RV32V-NEXT: vslidedown.vi v12, v10, 6
13174 ; RV32V-NEXT: vmv.x.s a1, v12
13175 ; RV32V-NEXT: lbu a2, 1(a1)
13176 ; RV32V-NEXT: lbu a1, 0(a1)
13177 ; RV32V-NEXT: slli a2, a2, 8
13178 ; RV32V-NEXT: or a1, a2, a1
13179 ; RV32V-NEXT: vmv.s.x v9, a1
13180 ; RV32V-NEXT: vsetivli zero, 7, e16, m1, tu, ma
13181 ; RV32V-NEXT: vslideup.vi v8, v9, 6
13182 ; RV32V-NEXT: andi a0, a0, -128
13183 ; RV32V-NEXT: beqz a0, .LBB107_8
13184 ; RV32V-NEXT: .LBB107_16: # %cond.load19
13185 ; RV32V-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13186 ; RV32V-NEXT: vslidedown.vi v10, v10, 7
13187 ; RV32V-NEXT: vmv.x.s a0, v10
13188 ; RV32V-NEXT: lbu a1, 1(a0)
13189 ; RV32V-NEXT: lbu a0, 0(a0)
13190 ; RV32V-NEXT: slli a1, a1, 8
13191 ; RV32V-NEXT: or a0, a1, a0
13192 ; RV32V-NEXT: vmv.s.x v9, a0
13193 ; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13194 ; RV32V-NEXT: vslideup.vi v8, v9, 7
13197 ; RV64V-LABEL: mgather_strided_unaligned:
13199 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
13200 ; RV64V-NEXT: vmset.m v8
13201 ; RV64V-NEXT: vid.v v12
13202 ; RV64V-NEXT: vsll.vi v12, v12, 2
13203 ; RV64V-NEXT: vadd.vx v12, v12, a0
13204 ; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
13205 ; RV64V-NEXT: vmv.x.s a0, v8
13206 ; RV64V-NEXT: # implicit-def: $v8
13207 ; RV64V-NEXT: beqz zero, .LBB107_11
13208 ; RV64V-NEXT: # %bb.1: # %else
13209 ; RV64V-NEXT: andi a1, a0, 2
13210 ; RV64V-NEXT: bnez a1, .LBB107_12
13211 ; RV64V-NEXT: .LBB107_2: # %else2
13212 ; RV64V-NEXT: andi a1, a0, 4
13213 ; RV64V-NEXT: bnez a1, .LBB107_13
13214 ; RV64V-NEXT: .LBB107_3: # %else5
13215 ; RV64V-NEXT: andi a1, a0, 8
13216 ; RV64V-NEXT: beqz a1, .LBB107_5
13217 ; RV64V-NEXT: .LBB107_4: # %cond.load7
13218 ; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
13219 ; RV64V-NEXT: vslidedown.vi v10, v12, 3
13220 ; RV64V-NEXT: vmv.x.s a1, v10
13221 ; RV64V-NEXT: lbu a2, 1(a1)
13222 ; RV64V-NEXT: lbu a1, 0(a1)
13223 ; RV64V-NEXT: slli a2, a2, 8
13224 ; RV64V-NEXT: or a1, a2, a1
13225 ; RV64V-NEXT: vmv.s.x v9, a1
13226 ; RV64V-NEXT: vsetivli zero, 4, e16, m1, tu, ma
13227 ; RV64V-NEXT: vslideup.vi v8, v9, 3
13228 ; RV64V-NEXT: .LBB107_5: # %else8
13229 ; RV64V-NEXT: addi sp, sp, -320
13230 ; RV64V-NEXT: .cfi_def_cfa_offset 320
13231 ; RV64V-NEXT: sd ra, 312(sp) # 8-byte Folded Spill
13232 ; RV64V-NEXT: sd s0, 304(sp) # 8-byte Folded Spill
13233 ; RV64V-NEXT: .cfi_offset ra, -8
13234 ; RV64V-NEXT: .cfi_offset s0, -16
13235 ; RV64V-NEXT: addi s0, sp, 320
13236 ; RV64V-NEXT: .cfi_def_cfa s0, 0
13237 ; RV64V-NEXT: andi sp, sp, -64
13238 ; RV64V-NEXT: andi a1, a0, 16
13239 ; RV64V-NEXT: bnez a1, .LBB107_14
13240 ; RV64V-NEXT: # %bb.6: # %else11
13241 ; RV64V-NEXT: andi a1, a0, 32
13242 ; RV64V-NEXT: bnez a1, .LBB107_15
13243 ; RV64V-NEXT: .LBB107_7: # %else14
13244 ; RV64V-NEXT: andi a1, a0, 64
13245 ; RV64V-NEXT: bnez a1, .LBB107_16
13246 ; RV64V-NEXT: .LBB107_8: # %else17
13247 ; RV64V-NEXT: andi a0, a0, -128
13248 ; RV64V-NEXT: beqz a0, .LBB107_10
13249 ; RV64V-NEXT: .LBB107_9: # %cond.load19
13250 ; RV64V-NEXT: mv a0, sp
13251 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
13252 ; RV64V-NEXT: vse64.v v12, (a0)
13253 ; RV64V-NEXT: ld a0, 56(sp)
13254 ; RV64V-NEXT: lbu a1, 1(a0)
13255 ; RV64V-NEXT: lbu a0, 0(a0)
13256 ; RV64V-NEXT: slli a1, a1, 8
13257 ; RV64V-NEXT: or a0, a1, a0
13258 ; RV64V-NEXT: vmv.s.x v9, a0
13259 ; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
13260 ; RV64V-NEXT: vslideup.vi v8, v9, 7
13261 ; RV64V-NEXT: .LBB107_10: # %else20
13262 ; RV64V-NEXT: addi sp, s0, -320
13263 ; RV64V-NEXT: ld ra, 312(sp) # 8-byte Folded Reload
13264 ; RV64V-NEXT: ld s0, 304(sp) # 8-byte Folded Reload
13265 ; RV64V-NEXT: addi sp, sp, 320
13267 ; RV64V-NEXT: .LBB107_11: # %cond.load
13268 ; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
13269 ; RV64V-NEXT: vmv.x.s a1, v12
13270 ; RV64V-NEXT: lbu a2, 1(a1)
13271 ; RV64V-NEXT: lbu a1, 0(a1)
13272 ; RV64V-NEXT: slli a2, a2, 8
13273 ; RV64V-NEXT: or a1, a2, a1
13274 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13275 ; RV64V-NEXT: vmv.v.x v8, a1
13276 ; RV64V-NEXT: andi a1, a0, 2
13277 ; RV64V-NEXT: beqz a1, .LBB107_2
13278 ; RV64V-NEXT: .LBB107_12: # %cond.load1
13279 ; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, ma
13280 ; RV64V-NEXT: vslidedown.vi v9, v12, 1
13281 ; RV64V-NEXT: vmv.x.s a1, v9
13282 ; RV64V-NEXT: lbu a2, 1(a1)
13283 ; RV64V-NEXT: lbu a1, 0(a1)
13284 ; RV64V-NEXT: slli a2, a2, 8
13285 ; RV64V-NEXT: or a1, a2, a1
13286 ; RV64V-NEXT: vmv.s.x v9, a1
13287 ; RV64V-NEXT: vsetivli zero, 2, e16, m1, tu, ma
13288 ; RV64V-NEXT: vslideup.vi v8, v9, 1
13289 ; RV64V-NEXT: andi a1, a0, 4
13290 ; RV64V-NEXT: beqz a1, .LBB107_3
13291 ; RV64V-NEXT: .LBB107_13: # %cond.load4
13292 ; RV64V-NEXT: vsetivli zero, 1, e64, m2, ta, ma
13293 ; RV64V-NEXT: vslidedown.vi v10, v12, 2
13294 ; RV64V-NEXT: vmv.x.s a1, v10
13295 ; RV64V-NEXT: lbu a2, 1(a1)
13296 ; RV64V-NEXT: lbu a1, 0(a1)
13297 ; RV64V-NEXT: slli a2, a2, 8
13298 ; RV64V-NEXT: or a1, a2, a1
13299 ; RV64V-NEXT: vmv.s.x v9, a1
13300 ; RV64V-NEXT: vsetivli zero, 3, e16, m1, tu, ma
13301 ; RV64V-NEXT: vslideup.vi v8, v9, 2
13302 ; RV64V-NEXT: andi a1, a0, 8
13303 ; RV64V-NEXT: bnez a1, .LBB107_4
13304 ; RV64V-NEXT: j .LBB107_5
13305 ; RV64V-NEXT: .LBB107_14: # %cond.load10
13306 ; RV64V-NEXT: addi a1, sp, 192
13307 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
13308 ; RV64V-NEXT: vse64.v v12, (a1)
13309 ; RV64V-NEXT: ld a1, 224(sp)
13310 ; RV64V-NEXT: lbu a2, 1(a1)
13311 ; RV64V-NEXT: lbu a1, 0(a1)
13312 ; RV64V-NEXT: slli a2, a2, 8
13313 ; RV64V-NEXT: or a1, a2, a1
13314 ; RV64V-NEXT: vmv.s.x v9, a1
13315 ; RV64V-NEXT: vsetivli zero, 5, e16, m1, tu, ma
13316 ; RV64V-NEXT: vslideup.vi v8, v9, 4
13317 ; RV64V-NEXT: andi a1, a0, 32
13318 ; RV64V-NEXT: beqz a1, .LBB107_7
13319 ; RV64V-NEXT: .LBB107_15: # %cond.load13
13320 ; RV64V-NEXT: addi a1, sp, 128
13321 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
13322 ; RV64V-NEXT: vse64.v v12, (a1)
13323 ; RV64V-NEXT: ld a1, 168(sp)
13324 ; RV64V-NEXT: lbu a2, 1(a1)
13325 ; RV64V-NEXT: lbu a1, 0(a1)
13326 ; RV64V-NEXT: slli a2, a2, 8
13327 ; RV64V-NEXT: or a1, a2, a1
13328 ; RV64V-NEXT: vmv.s.x v9, a1
13329 ; RV64V-NEXT: vsetivli zero, 6, e16, m1, tu, ma
13330 ; RV64V-NEXT: vslideup.vi v8, v9, 5
13331 ; RV64V-NEXT: andi a1, a0, 64
13332 ; RV64V-NEXT: beqz a1, .LBB107_8
13333 ; RV64V-NEXT: .LBB107_16: # %cond.load16
13334 ; RV64V-NEXT: addi a1, sp, 64
13335 ; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
13336 ; RV64V-NEXT: vse64.v v12, (a1)
13337 ; RV64V-NEXT: ld a1, 112(sp)
13338 ; RV64V-NEXT: lbu a2, 1(a1)
13339 ; RV64V-NEXT: lbu a1, 0(a1)
13340 ; RV64V-NEXT: slli a2, a2, 8
13341 ; RV64V-NEXT: or a1, a2, a1
13342 ; RV64V-NEXT: vmv.s.x v9, a1
13343 ; RV64V-NEXT: vsetivli zero, 7, e16, m1, tu, ma
13344 ; RV64V-NEXT: vslideup.vi v8, v9, 6
13345 ; RV64V-NEXT: andi a0, a0, -128
13346 ; RV64V-NEXT: bnez a0, .LBB107_9
13347 ; RV64V-NEXT: j .LBB107_10
13349 ; RV32ZVE32F-LABEL: mgather_strided_unaligned:
13350 ; RV32ZVE32F: # %bb.0:
13351 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
13352 ; RV32ZVE32F-NEXT: vmset.m v8
13353 ; RV32ZVE32F-NEXT: vid.v v10
13354 ; RV32ZVE32F-NEXT: vsll.vi v10, v10, 2
13355 ; RV32ZVE32F-NEXT: vadd.vx v10, v10, a0
13356 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
13357 ; RV32ZVE32F-NEXT: vmv.x.s a0, v8
13358 ; RV32ZVE32F-NEXT: # implicit-def: $v8
13359 ; RV32ZVE32F-NEXT: beqz zero, .LBB107_9
13360 ; RV32ZVE32F-NEXT: # %bb.1: # %else
13361 ; RV32ZVE32F-NEXT: andi a1, a0, 2
13362 ; RV32ZVE32F-NEXT: bnez a1, .LBB107_10
13363 ; RV32ZVE32F-NEXT: .LBB107_2: # %else2
13364 ; RV32ZVE32F-NEXT: andi a1, a0, 4
13365 ; RV32ZVE32F-NEXT: bnez a1, .LBB107_11
13366 ; RV32ZVE32F-NEXT: .LBB107_3: # %else5
13367 ; RV32ZVE32F-NEXT: andi a1, a0, 8
13368 ; RV32ZVE32F-NEXT: bnez a1, .LBB107_12
13369 ; RV32ZVE32F-NEXT: .LBB107_4: # %else8
13370 ; RV32ZVE32F-NEXT: andi a1, a0, 16
13371 ; RV32ZVE32F-NEXT: bnez a1, .LBB107_13
13372 ; RV32ZVE32F-NEXT: .LBB107_5: # %else11
13373 ; RV32ZVE32F-NEXT: andi a1, a0, 32
13374 ; RV32ZVE32F-NEXT: bnez a1, .LBB107_14
13375 ; RV32ZVE32F-NEXT: .LBB107_6: # %else14
13376 ; RV32ZVE32F-NEXT: andi a1, a0, 64
13377 ; RV32ZVE32F-NEXT: bnez a1, .LBB107_15
13378 ; RV32ZVE32F-NEXT: .LBB107_7: # %else17
13379 ; RV32ZVE32F-NEXT: andi a0, a0, -128
13380 ; RV32ZVE32F-NEXT: bnez a0, .LBB107_16
13381 ; RV32ZVE32F-NEXT: .LBB107_8: # %else20
13382 ; RV32ZVE32F-NEXT: ret
13383 ; RV32ZVE32F-NEXT: .LBB107_9: # %cond.load
13384 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
13385 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10
13386 ; RV32ZVE32F-NEXT: lbu a2, 1(a1)
13387 ; RV32ZVE32F-NEXT: lbu a1, 0(a1)
13388 ; RV32ZVE32F-NEXT: slli a2, a2, 8
13389 ; RV32ZVE32F-NEXT: or a1, a2, a1
13390 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13391 ; RV32ZVE32F-NEXT: vmv.v.x v8, a1
13392 ; RV32ZVE32F-NEXT: andi a1, a0, 2
13393 ; RV32ZVE32F-NEXT: beqz a1, .LBB107_2
13394 ; RV32ZVE32F-NEXT: .LBB107_10: # %cond.load1
13395 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13396 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v10, 1
13397 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
13398 ; RV32ZVE32F-NEXT: lbu a2, 1(a1)
13399 ; RV32ZVE32F-NEXT: lbu a1, 0(a1)
13400 ; RV32ZVE32F-NEXT: slli a2, a2, 8
13401 ; RV32ZVE32F-NEXT: or a1, a2, a1
13402 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
13403 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
13404 ; RV32ZVE32F-NEXT: vslideup.vi v8, v9, 1
13405 ; RV32ZVE32F-NEXT: andi a1, a0, 4
13406 ; RV32ZVE32F-NEXT: beqz a1, .LBB107_3
13407 ; RV32ZVE32F-NEXT: .LBB107_11: # %cond.load4
13408 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13409 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v10, 2
13410 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
13411 ; RV32ZVE32F-NEXT: lbu a2, 1(a1)
13412 ; RV32ZVE32F-NEXT: lbu a1, 0(a1)
13413 ; RV32ZVE32F-NEXT: slli a2, a2, 8
13414 ; RV32ZVE32F-NEXT: or a1, a2, a1
13415 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
13416 ; RV32ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
13417 ; RV32ZVE32F-NEXT: vslideup.vi v8, v9, 2
13418 ; RV32ZVE32F-NEXT: andi a1, a0, 8
13419 ; RV32ZVE32F-NEXT: beqz a1, .LBB107_4
13420 ; RV32ZVE32F-NEXT: .LBB107_12: # %cond.load7
13421 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
13422 ; RV32ZVE32F-NEXT: vslidedown.vi v9, v10, 3
13423 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9
13424 ; RV32ZVE32F-NEXT: lbu a2, 1(a1)
13425 ; RV32ZVE32F-NEXT: lbu a1, 0(a1)
13426 ; RV32ZVE32F-NEXT: slli a2, a2, 8
13427 ; RV32ZVE32F-NEXT: or a1, a2, a1
13428 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
13429 ; RV32ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
13430 ; RV32ZVE32F-NEXT: vslideup.vi v8, v9, 3
13431 ; RV32ZVE32F-NEXT: andi a1, a0, 16
13432 ; RV32ZVE32F-NEXT: beqz a1, .LBB107_5
13433 ; RV32ZVE32F-NEXT: .LBB107_13: # %cond.load10
13434 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13435 ; RV32ZVE32F-NEXT: vslidedown.vi v12, v10, 4
13436 ; RV32ZVE32F-NEXT: vmv.x.s a1, v12
13437 ; RV32ZVE32F-NEXT: lbu a2, 1(a1)
13438 ; RV32ZVE32F-NEXT: lbu a1, 0(a1)
13439 ; RV32ZVE32F-NEXT: slli a2, a2, 8
13440 ; RV32ZVE32F-NEXT: or a1, a2, a1
13441 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
13442 ; RV32ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
13443 ; RV32ZVE32F-NEXT: vslideup.vi v8, v9, 4
13444 ; RV32ZVE32F-NEXT: andi a1, a0, 32
13445 ; RV32ZVE32F-NEXT: beqz a1, .LBB107_6
13446 ; RV32ZVE32F-NEXT: .LBB107_14: # %cond.load13
13447 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13448 ; RV32ZVE32F-NEXT: vslidedown.vi v12, v10, 5
13449 ; RV32ZVE32F-NEXT: vmv.x.s a1, v12
13450 ; RV32ZVE32F-NEXT: lbu a2, 1(a1)
13451 ; RV32ZVE32F-NEXT: lbu a1, 0(a1)
13452 ; RV32ZVE32F-NEXT: slli a2, a2, 8
13453 ; RV32ZVE32F-NEXT: or a1, a2, a1
13454 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
13455 ; RV32ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
13456 ; RV32ZVE32F-NEXT: vslideup.vi v8, v9, 5
13457 ; RV32ZVE32F-NEXT: andi a1, a0, 64
13458 ; RV32ZVE32F-NEXT: beqz a1, .LBB107_7
13459 ; RV32ZVE32F-NEXT: .LBB107_15: # %cond.load16
13460 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13461 ; RV32ZVE32F-NEXT: vslidedown.vi v12, v10, 6
13462 ; RV32ZVE32F-NEXT: vmv.x.s a1, v12
13463 ; RV32ZVE32F-NEXT: lbu a2, 1(a1)
13464 ; RV32ZVE32F-NEXT: lbu a1, 0(a1)
13465 ; RV32ZVE32F-NEXT: slli a2, a2, 8
13466 ; RV32ZVE32F-NEXT: or a1, a2, a1
13467 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
13468 ; RV32ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
13469 ; RV32ZVE32F-NEXT: vslideup.vi v8, v9, 6
13470 ; RV32ZVE32F-NEXT: andi a0, a0, -128
13471 ; RV32ZVE32F-NEXT: beqz a0, .LBB107_8
13472 ; RV32ZVE32F-NEXT: .LBB107_16: # %cond.load19
13473 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
13474 ; RV32ZVE32F-NEXT: vslidedown.vi v10, v10, 7
13475 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10
13476 ; RV32ZVE32F-NEXT: lbu a1, 1(a0)
13477 ; RV32ZVE32F-NEXT: lbu a0, 0(a0)
13478 ; RV32ZVE32F-NEXT: slli a1, a1, 8
13479 ; RV32ZVE32F-NEXT: or a0, a1, a0
13480 ; RV32ZVE32F-NEXT: vmv.s.x v9, a0
13481 ; RV32ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13482 ; RV32ZVE32F-NEXT: vslideup.vi v8, v9, 7
13483 ; RV32ZVE32F-NEXT: ret
13485 ; RV64ZVE32F-LABEL: mgather_strided_unaligned:
13486 ; RV64ZVE32F: # %bb.0:
13487 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
13488 ; RV64ZVE32F-NEXT: vmset.m v8
13489 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
13490 ; RV64ZVE32F-NEXT: # implicit-def: $v8
13491 ; RV64ZVE32F-NEXT: beqz zero, .LBB107_9
13492 ; RV64ZVE32F-NEXT: # %bb.1: # %else
13493 ; RV64ZVE32F-NEXT: andi a2, a1, 2
13494 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_10
13495 ; RV64ZVE32F-NEXT: .LBB107_2: # %else2
13496 ; RV64ZVE32F-NEXT: andi a2, a1, 4
13497 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_11
13498 ; RV64ZVE32F-NEXT: .LBB107_3: # %else5
13499 ; RV64ZVE32F-NEXT: andi a2, a1, 8
13500 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_12
13501 ; RV64ZVE32F-NEXT: .LBB107_4: # %else8
13502 ; RV64ZVE32F-NEXT: andi a2, a1, 16
13503 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_13
13504 ; RV64ZVE32F-NEXT: .LBB107_5: # %else11
13505 ; RV64ZVE32F-NEXT: andi a2, a1, 32
13506 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_14
13507 ; RV64ZVE32F-NEXT: .LBB107_6: # %else14
13508 ; RV64ZVE32F-NEXT: andi a2, a1, 64
13509 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_15
13510 ; RV64ZVE32F-NEXT: .LBB107_7: # %else17
13511 ; RV64ZVE32F-NEXT: andi a1, a1, -128
13512 ; RV64ZVE32F-NEXT: bnez a1, .LBB107_16
13513 ; RV64ZVE32F-NEXT: .LBB107_8: # %else20
13514 ; RV64ZVE32F-NEXT: ret
13515 ; RV64ZVE32F-NEXT: .LBB107_9: # %cond.load
13516 ; RV64ZVE32F-NEXT: lbu a2, 1(a0)
13517 ; RV64ZVE32F-NEXT: lbu a3, 0(a0)
13518 ; RV64ZVE32F-NEXT: slli a2, a2, 8
13519 ; RV64ZVE32F-NEXT: or a2, a2, a3
13520 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
13521 ; RV64ZVE32F-NEXT: vmv.v.x v8, a2
13522 ; RV64ZVE32F-NEXT: andi a2, a1, 2
13523 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_2
13524 ; RV64ZVE32F-NEXT: .LBB107_10: # %cond.load1
13525 ; RV64ZVE32F-NEXT: lbu a2, 5(a0)
13526 ; RV64ZVE32F-NEXT: lbu a3, 4(a0)
13527 ; RV64ZVE32F-NEXT: slli a2, a2, 8
13528 ; RV64ZVE32F-NEXT: or a2, a2, a3
13529 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
13530 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13531 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
13532 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
13533 ; RV64ZVE32F-NEXT: andi a2, a1, 4
13534 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_3
13535 ; RV64ZVE32F-NEXT: .LBB107_11: # %cond.load4
13536 ; RV64ZVE32F-NEXT: lbu a2, 9(a0)
13537 ; RV64ZVE32F-NEXT: lbu a3, 8(a0)
13538 ; RV64ZVE32F-NEXT: slli a2, a2, 8
13539 ; RV64ZVE32F-NEXT: or a2, a2, a3
13540 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
13541 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13542 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
13543 ; RV64ZVE32F-NEXT: andi a2, a1, 8
13544 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_4
13545 ; RV64ZVE32F-NEXT: .LBB107_12: # %cond.load7
13546 ; RV64ZVE32F-NEXT: lbu a2, 13(a0)
13547 ; RV64ZVE32F-NEXT: lbu a3, 12(a0)
13548 ; RV64ZVE32F-NEXT: slli a2, a2, 8
13549 ; RV64ZVE32F-NEXT: or a2, a2, a3
13550 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
13551 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13552 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
13553 ; RV64ZVE32F-NEXT: andi a2, a1, 16
13554 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_5
13555 ; RV64ZVE32F-NEXT: .LBB107_13: # %cond.load10
13556 ; RV64ZVE32F-NEXT: lbu a2, 17(a0)
13557 ; RV64ZVE32F-NEXT: lbu a3, 16(a0)
13558 ; RV64ZVE32F-NEXT: slli a2, a2, 8
13559 ; RV64ZVE32F-NEXT: or a2, a2, a3
13560 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
13561 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13562 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
13563 ; RV64ZVE32F-NEXT: andi a2, a1, 32
13564 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_6
13565 ; RV64ZVE32F-NEXT: .LBB107_14: # %cond.load13
13566 ; RV64ZVE32F-NEXT: lbu a2, 21(a0)
13567 ; RV64ZVE32F-NEXT: lbu a3, 20(a0)
13568 ; RV64ZVE32F-NEXT: slli a2, a2, 8
13569 ; RV64ZVE32F-NEXT: or a2, a2, a3
13570 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
13571 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13572 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
13573 ; RV64ZVE32F-NEXT: andi a2, a1, 64
13574 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_7
13575 ; RV64ZVE32F-NEXT: .LBB107_15: # %cond.load16
13576 ; RV64ZVE32F-NEXT: lbu a2, 25(a0)
13577 ; RV64ZVE32F-NEXT: lbu a3, 24(a0)
13578 ; RV64ZVE32F-NEXT: slli a2, a2, 8
13579 ; RV64ZVE32F-NEXT: or a2, a2, a3
13580 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
13581 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13582 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
13583 ; RV64ZVE32F-NEXT: andi a1, a1, -128
13584 ; RV64ZVE32F-NEXT: beqz a1, .LBB107_8
13585 ; RV64ZVE32F-NEXT: .LBB107_16: # %cond.load19
13586 ; RV64ZVE32F-NEXT: lbu a1, 29(a0)
13587 ; RV64ZVE32F-NEXT: lbu a0, 28(a0)
13588 ; RV64ZVE32F-NEXT: slli a1, a1, 8
13589 ; RV64ZVE32F-NEXT: or a0, a1, a0
13590 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13591 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
13592 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
13593 ; RV64ZVE32F-NEXT: ret
13594 %head = insertelement <8 x i1> poison, i1 true, i16 0
13595 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
13596 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
13597 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %allones, <8 x i16> poison)
13601 ; TODO: Recognize as strided load with SEW=32
13602 define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
13603 ; RV32-LABEL: mgather_strided_2xSEW:
13605 ; RV32-NEXT: li a1, 8
13606 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13607 ; RV32-NEXT: vlse32.v v8, (a0), a1
13610 ; RV64V-LABEL: mgather_strided_2xSEW:
13612 ; RV64V-NEXT: li a1, 8
13613 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13614 ; RV64V-NEXT: vlse32.v v8, (a0), a1
13617 ; RV64ZVE32F-LABEL: mgather_strided_2xSEW:
13618 ; RV64ZVE32F: # %bb.0:
13619 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
13620 ; RV64ZVE32F-NEXT: vmset.m v8
13621 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
13622 ; RV64ZVE32F-NEXT: # implicit-def: $v8
13623 ; RV64ZVE32F-NEXT: beqz zero, .LBB108_9
13624 ; RV64ZVE32F-NEXT: # %bb.1: # %else
13625 ; RV64ZVE32F-NEXT: andi a2, a1, 2
13626 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_10
13627 ; RV64ZVE32F-NEXT: .LBB108_2: # %else2
13628 ; RV64ZVE32F-NEXT: andi a2, a1, 4
13629 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_11
13630 ; RV64ZVE32F-NEXT: .LBB108_3: # %else5
13631 ; RV64ZVE32F-NEXT: andi a2, a1, 8
13632 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_12
13633 ; RV64ZVE32F-NEXT: .LBB108_4: # %else8
13634 ; RV64ZVE32F-NEXT: andi a2, a1, 16
13635 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_13
13636 ; RV64ZVE32F-NEXT: .LBB108_5: # %else11
13637 ; RV64ZVE32F-NEXT: andi a2, a1, 32
13638 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_14
13639 ; RV64ZVE32F-NEXT: .LBB108_6: # %else14
13640 ; RV64ZVE32F-NEXT: andi a2, a1, 64
13641 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_15
13642 ; RV64ZVE32F-NEXT: .LBB108_7: # %else17
13643 ; RV64ZVE32F-NEXT: andi a1, a1, -128
13644 ; RV64ZVE32F-NEXT: bnez a1, .LBB108_16
13645 ; RV64ZVE32F-NEXT: .LBB108_8: # %else20
13646 ; RV64ZVE32F-NEXT: ret
13647 ; RV64ZVE32F-NEXT: .LBB108_9: # %cond.load
13648 ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
13649 ; RV64ZVE32F-NEXT: andi a2, a1, 2
13650 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_2
13651 ; RV64ZVE32F-NEXT: .LBB108_10: # %cond.load1
13652 ; RV64ZVE32F-NEXT: lh a2, 2(a0)
13653 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
13654 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13655 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
13656 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
13657 ; RV64ZVE32F-NEXT: andi a2, a1, 4
13658 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_3
13659 ; RV64ZVE32F-NEXT: .LBB108_11: # %cond.load4
13660 ; RV64ZVE32F-NEXT: lh a2, 8(a0)
13661 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
13662 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13663 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
13664 ; RV64ZVE32F-NEXT: andi a2, a1, 8
13665 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_4
13666 ; RV64ZVE32F-NEXT: .LBB108_12: # %cond.load7
13667 ; RV64ZVE32F-NEXT: lh a2, 10(a0)
13668 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
13669 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13670 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
13671 ; RV64ZVE32F-NEXT: andi a2, a1, 16
13672 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_5
13673 ; RV64ZVE32F-NEXT: .LBB108_13: # %cond.load10
13674 ; RV64ZVE32F-NEXT: lh a2, 16(a0)
13675 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
13676 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13677 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
13678 ; RV64ZVE32F-NEXT: andi a2, a1, 32
13679 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_6
13680 ; RV64ZVE32F-NEXT: .LBB108_14: # %cond.load13
13681 ; RV64ZVE32F-NEXT: lh a2, 18(a0)
13682 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
13683 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13684 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
13685 ; RV64ZVE32F-NEXT: andi a2, a1, 64
13686 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_7
13687 ; RV64ZVE32F-NEXT: .LBB108_15: # %cond.load16
13688 ; RV64ZVE32F-NEXT: lh a2, 24(a0)
13689 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
13690 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13691 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
13692 ; RV64ZVE32F-NEXT: andi a1, a1, -128
13693 ; RV64ZVE32F-NEXT: beqz a1, .LBB108_8
13694 ; RV64ZVE32F-NEXT: .LBB108_16: # %cond.load19
13695 ; RV64ZVE32F-NEXT: lh a0, 26(a0)
13696 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13697 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
13698 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
13699 ; RV64ZVE32F-NEXT: ret
13700 %head = insertelement <8 x i1> poison, i1 true, i16 0
13701 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
13702 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
13703 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
13707 ; TODO: Recognize as strided load with SEW=32
13708 define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) {
13709 ; RV32-LABEL: mgather_strided_2xSEW_with_offset:
13711 ; RV32-NEXT: addi a0, a0, 4
13712 ; RV32-NEXT: li a1, 8
13713 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13714 ; RV32-NEXT: vlse32.v v8, (a0), a1
13717 ; RV64V-LABEL: mgather_strided_2xSEW_with_offset:
13719 ; RV64V-NEXT: addi a0, a0, 4
13720 ; RV64V-NEXT: li a1, 8
13721 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13722 ; RV64V-NEXT: vlse32.v v8, (a0), a1
13725 ; RV64ZVE32F-LABEL: mgather_strided_2xSEW_with_offset:
13726 ; RV64ZVE32F: # %bb.0:
13727 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
13728 ; RV64ZVE32F-NEXT: vmset.m v8
13729 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
13730 ; RV64ZVE32F-NEXT: # implicit-def: $v8
13731 ; RV64ZVE32F-NEXT: beqz zero, .LBB109_9
13732 ; RV64ZVE32F-NEXT: # %bb.1: # %else
13733 ; RV64ZVE32F-NEXT: andi a2, a1, 2
13734 ; RV64ZVE32F-NEXT: bnez a2, .LBB109_10
13735 ; RV64ZVE32F-NEXT: .LBB109_2: # %else2
13736 ; RV64ZVE32F-NEXT: andi a2, a1, 4
13737 ; RV64ZVE32F-NEXT: bnez a2, .LBB109_11
13738 ; RV64ZVE32F-NEXT: .LBB109_3: # %else5
13739 ; RV64ZVE32F-NEXT: andi a2, a1, 8
13740 ; RV64ZVE32F-NEXT: bnez a2, .LBB109_12
13741 ; RV64ZVE32F-NEXT: .LBB109_4: # %else8
13742 ; RV64ZVE32F-NEXT: andi a2, a1, 16
13743 ; RV64ZVE32F-NEXT: bnez a2, .LBB109_13
13744 ; RV64ZVE32F-NEXT: .LBB109_5: # %else11
13745 ; RV64ZVE32F-NEXT: andi a2, a1, 32
13746 ; RV64ZVE32F-NEXT: bnez a2, .LBB109_14
13747 ; RV64ZVE32F-NEXT: .LBB109_6: # %else14
13748 ; RV64ZVE32F-NEXT: andi a2, a1, 64
13749 ; RV64ZVE32F-NEXT: bnez a2, .LBB109_15
13750 ; RV64ZVE32F-NEXT: .LBB109_7: # %else17
13751 ; RV64ZVE32F-NEXT: andi a1, a1, -128
13752 ; RV64ZVE32F-NEXT: bnez a1, .LBB109_16
13753 ; RV64ZVE32F-NEXT: .LBB109_8: # %else20
13754 ; RV64ZVE32F-NEXT: ret
13755 ; RV64ZVE32F-NEXT: .LBB109_9: # %cond.load
13756 ; RV64ZVE32F-NEXT: addi a2, a0, 4
13757 ; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero
13758 ; RV64ZVE32F-NEXT: andi a2, a1, 2
13759 ; RV64ZVE32F-NEXT: beqz a2, .LBB109_2
13760 ; RV64ZVE32F-NEXT: .LBB109_10: # %cond.load1
13761 ; RV64ZVE32F-NEXT: lh a2, 6(a0)
13762 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
13763 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13764 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
13765 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
13766 ; RV64ZVE32F-NEXT: andi a2, a1, 4
13767 ; RV64ZVE32F-NEXT: beqz a2, .LBB109_3
13768 ; RV64ZVE32F-NEXT: .LBB109_11: # %cond.load4
13769 ; RV64ZVE32F-NEXT: lh a2, 12(a0)
13770 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
13771 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13772 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
13773 ; RV64ZVE32F-NEXT: andi a2, a1, 8
13774 ; RV64ZVE32F-NEXT: beqz a2, .LBB109_4
13775 ; RV64ZVE32F-NEXT: .LBB109_12: # %cond.load7
13776 ; RV64ZVE32F-NEXT: lh a2, 14(a0)
13777 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
13778 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13779 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
13780 ; RV64ZVE32F-NEXT: andi a2, a1, 16
13781 ; RV64ZVE32F-NEXT: beqz a2, .LBB109_5
13782 ; RV64ZVE32F-NEXT: .LBB109_13: # %cond.load10
13783 ; RV64ZVE32F-NEXT: lh a2, 20(a0)
13784 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
13785 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13786 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
13787 ; RV64ZVE32F-NEXT: andi a2, a1, 32
13788 ; RV64ZVE32F-NEXT: beqz a2, .LBB109_6
13789 ; RV64ZVE32F-NEXT: .LBB109_14: # %cond.load13
13790 ; RV64ZVE32F-NEXT: lh a2, 22(a0)
13791 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
13792 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13793 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
13794 ; RV64ZVE32F-NEXT: andi a2, a1, 64
13795 ; RV64ZVE32F-NEXT: beqz a2, .LBB109_7
13796 ; RV64ZVE32F-NEXT: .LBB109_15: # %cond.load16
13797 ; RV64ZVE32F-NEXT: lh a2, 28(a0)
13798 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
13799 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13800 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
13801 ; RV64ZVE32F-NEXT: andi a1, a1, -128
13802 ; RV64ZVE32F-NEXT: beqz a1, .LBB109_8
13803 ; RV64ZVE32F-NEXT: .LBB109_16: # %cond.load19
13804 ; RV64ZVE32F-NEXT: lh a0, 30(a0)
13805 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13806 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
13807 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
13808 ; RV64ZVE32F-NEXT: ret
13809 %head = insertelement <8 x i1> poison, i1 true, i16 0
13810 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
13811 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 2, i64 3, i64 6, i64 7, i64 10, i64 11, i64 14, i64 15>
13812 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
13816 ; TODO: Recognize as strided load with SEW=32
13817 define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) {
13818 ; RV32-LABEL: mgather_reverse_unit_strided_2xSEW:
13820 ; RV32-NEXT: addi a0, a0, 28
13821 ; RV32-NEXT: li a1, -4
13822 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13823 ; RV32-NEXT: vlse32.v v8, (a0), a1
13826 ; RV64V-LABEL: mgather_reverse_unit_strided_2xSEW:
13828 ; RV64V-NEXT: addi a0, a0, 28
13829 ; RV64V-NEXT: li a1, -4
13830 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13831 ; RV64V-NEXT: vlse32.v v8, (a0), a1
13834 ; RV64ZVE32F-LABEL: mgather_reverse_unit_strided_2xSEW:
13835 ; RV64ZVE32F: # %bb.0:
13836 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
13837 ; RV64ZVE32F-NEXT: vmset.m v8
13838 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
13839 ; RV64ZVE32F-NEXT: # implicit-def: $v8
13840 ; RV64ZVE32F-NEXT: beqz zero, .LBB110_9
13841 ; RV64ZVE32F-NEXT: # %bb.1: # %else
13842 ; RV64ZVE32F-NEXT: andi a2, a1, 2
13843 ; RV64ZVE32F-NEXT: bnez a2, .LBB110_10
13844 ; RV64ZVE32F-NEXT: .LBB110_2: # %else2
13845 ; RV64ZVE32F-NEXT: andi a2, a1, 4
13846 ; RV64ZVE32F-NEXT: bnez a2, .LBB110_11
13847 ; RV64ZVE32F-NEXT: .LBB110_3: # %else5
13848 ; RV64ZVE32F-NEXT: andi a2, a1, 8
13849 ; RV64ZVE32F-NEXT: bnez a2, .LBB110_12
13850 ; RV64ZVE32F-NEXT: .LBB110_4: # %else8
13851 ; RV64ZVE32F-NEXT: andi a2, a1, 16
13852 ; RV64ZVE32F-NEXT: bnez a2, .LBB110_13
13853 ; RV64ZVE32F-NEXT: .LBB110_5: # %else11
13854 ; RV64ZVE32F-NEXT: andi a2, a1, 32
13855 ; RV64ZVE32F-NEXT: bnez a2, .LBB110_14
13856 ; RV64ZVE32F-NEXT: .LBB110_6: # %else14
13857 ; RV64ZVE32F-NEXT: andi a2, a1, 64
13858 ; RV64ZVE32F-NEXT: bnez a2, .LBB110_15
13859 ; RV64ZVE32F-NEXT: .LBB110_7: # %else17
13860 ; RV64ZVE32F-NEXT: andi a1, a1, -128
13861 ; RV64ZVE32F-NEXT: bnez a1, .LBB110_16
13862 ; RV64ZVE32F-NEXT: .LBB110_8: # %else20
13863 ; RV64ZVE32F-NEXT: ret
13864 ; RV64ZVE32F-NEXT: .LBB110_9: # %cond.load
13865 ; RV64ZVE32F-NEXT: addi a2, a0, 28
13866 ; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero
13867 ; RV64ZVE32F-NEXT: andi a2, a1, 2
13868 ; RV64ZVE32F-NEXT: beqz a2, .LBB110_2
13869 ; RV64ZVE32F-NEXT: .LBB110_10: # %cond.load1
13870 ; RV64ZVE32F-NEXT: lh a2, 30(a0)
13871 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
13872 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13873 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
13874 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
13875 ; RV64ZVE32F-NEXT: andi a2, a1, 4
13876 ; RV64ZVE32F-NEXT: beqz a2, .LBB110_3
13877 ; RV64ZVE32F-NEXT: .LBB110_11: # %cond.load4
13878 ; RV64ZVE32F-NEXT: lh a2, 24(a0)
13879 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
13880 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13881 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
13882 ; RV64ZVE32F-NEXT: andi a2, a1, 8
13883 ; RV64ZVE32F-NEXT: beqz a2, .LBB110_4
13884 ; RV64ZVE32F-NEXT: .LBB110_12: # %cond.load7
13885 ; RV64ZVE32F-NEXT: lh a2, 26(a0)
13886 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
13887 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13888 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
13889 ; RV64ZVE32F-NEXT: andi a2, a1, 16
13890 ; RV64ZVE32F-NEXT: beqz a2, .LBB110_5
13891 ; RV64ZVE32F-NEXT: .LBB110_13: # %cond.load10
13892 ; RV64ZVE32F-NEXT: lh a2, 20(a0)
13893 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
13894 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13895 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
13896 ; RV64ZVE32F-NEXT: andi a2, a1, 32
13897 ; RV64ZVE32F-NEXT: beqz a2, .LBB110_6
13898 ; RV64ZVE32F-NEXT: .LBB110_14: # %cond.load13
13899 ; RV64ZVE32F-NEXT: lh a2, 22(a0)
13900 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
13901 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13902 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
13903 ; RV64ZVE32F-NEXT: andi a2, a1, 64
13904 ; RV64ZVE32F-NEXT: beqz a2, .LBB110_7
13905 ; RV64ZVE32F-NEXT: .LBB110_15: # %cond.load16
13906 ; RV64ZVE32F-NEXT: lh a2, 16(a0)
13907 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
13908 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13909 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
13910 ; RV64ZVE32F-NEXT: andi a1, a1, -128
13911 ; RV64ZVE32F-NEXT: beqz a1, .LBB110_8
13912 ; RV64ZVE32F-NEXT: .LBB110_16: # %cond.load19
13913 ; RV64ZVE32F-NEXT: lh a0, 18(a0)
13914 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
13915 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
13916 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
13917 ; RV64ZVE32F-NEXT: ret
13918 %head = insertelement <8 x i1> poison, i1 true, i16 0
13919 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
13920 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 12, i64 13, i64 10, i64 11, i64 8, i64 9>
13921 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
13925 ; TODO: Recognize as strided load with SEW=32
13926 define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) {
13927 ; RV32-LABEL: mgather_reverse_strided_2xSEW:
13929 ; RV32-NEXT: addi a0, a0, 28
13930 ; RV32-NEXT: li a1, -8
13931 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13932 ; RV32-NEXT: vlse32.v v8, (a0), a1
13935 ; RV64V-LABEL: mgather_reverse_strided_2xSEW:
13937 ; RV64V-NEXT: addi a0, a0, 28
13938 ; RV64V-NEXT: li a1, -8
13939 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
13940 ; RV64V-NEXT: vlse32.v v8, (a0), a1
13943 ; RV64ZVE32F-LABEL: mgather_reverse_strided_2xSEW:
13944 ; RV64ZVE32F: # %bb.0:
13945 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
13946 ; RV64ZVE32F-NEXT: vmset.m v8
13947 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
13948 ; RV64ZVE32F-NEXT: # implicit-def: $v8
13949 ; RV64ZVE32F-NEXT: beqz zero, .LBB111_9
13950 ; RV64ZVE32F-NEXT: # %bb.1: # %else
13951 ; RV64ZVE32F-NEXT: andi a2, a1, 2
13952 ; RV64ZVE32F-NEXT: bnez a2, .LBB111_10
13953 ; RV64ZVE32F-NEXT: .LBB111_2: # %else2
13954 ; RV64ZVE32F-NEXT: andi a2, a1, 4
13955 ; RV64ZVE32F-NEXT: bnez a2, .LBB111_11
13956 ; RV64ZVE32F-NEXT: .LBB111_3: # %else5
13957 ; RV64ZVE32F-NEXT: andi a2, a1, 8
13958 ; RV64ZVE32F-NEXT: bnez a2, .LBB111_12
13959 ; RV64ZVE32F-NEXT: .LBB111_4: # %else8
13960 ; RV64ZVE32F-NEXT: andi a2, a1, 16
13961 ; RV64ZVE32F-NEXT: bnez a2, .LBB111_13
13962 ; RV64ZVE32F-NEXT: .LBB111_5: # %else11
13963 ; RV64ZVE32F-NEXT: andi a2, a1, 32
13964 ; RV64ZVE32F-NEXT: bnez a2, .LBB111_14
13965 ; RV64ZVE32F-NEXT: .LBB111_6: # %else14
13966 ; RV64ZVE32F-NEXT: andi a2, a1, 64
13967 ; RV64ZVE32F-NEXT: bnez a2, .LBB111_15
13968 ; RV64ZVE32F-NEXT: .LBB111_7: # %else17
13969 ; RV64ZVE32F-NEXT: andi a1, a1, -128
13970 ; RV64ZVE32F-NEXT: bnez a1, .LBB111_16
13971 ; RV64ZVE32F-NEXT: .LBB111_8: # %else20
13972 ; RV64ZVE32F-NEXT: ret
13973 ; RV64ZVE32F-NEXT: .LBB111_9: # %cond.load
13974 ; RV64ZVE32F-NEXT: addi a2, a0, 28
13975 ; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero
13976 ; RV64ZVE32F-NEXT: andi a2, a1, 2
13977 ; RV64ZVE32F-NEXT: beqz a2, .LBB111_2
13978 ; RV64ZVE32F-NEXT: .LBB111_10: # %cond.load1
13979 ; RV64ZVE32F-NEXT: lh a2, 30(a0)
13980 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
13981 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13982 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
13983 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
13984 ; RV64ZVE32F-NEXT: andi a2, a1, 4
13985 ; RV64ZVE32F-NEXT: beqz a2, .LBB111_3
13986 ; RV64ZVE32F-NEXT: .LBB111_11: # %cond.load4
13987 ; RV64ZVE32F-NEXT: lh a2, 20(a0)
13988 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
13989 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13990 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
13991 ; RV64ZVE32F-NEXT: andi a2, a1, 8
13992 ; RV64ZVE32F-NEXT: beqz a2, .LBB111_4
13993 ; RV64ZVE32F-NEXT: .LBB111_12: # %cond.load7
13994 ; RV64ZVE32F-NEXT: lh a2, 22(a0)
13995 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
13996 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
13997 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
13998 ; RV64ZVE32F-NEXT: andi a2, a1, 16
13999 ; RV64ZVE32F-NEXT: beqz a2, .LBB111_5
14000 ; RV64ZVE32F-NEXT: .LBB111_13: # %cond.load10
14001 ; RV64ZVE32F-NEXT: lh a2, 12(a0)
14002 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
14003 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14004 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
14005 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14006 ; RV64ZVE32F-NEXT: beqz a2, .LBB111_6
14007 ; RV64ZVE32F-NEXT: .LBB111_14: # %cond.load13
14008 ; RV64ZVE32F-NEXT: lh a2, 14(a0)
14009 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
14010 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14011 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
14012 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14013 ; RV64ZVE32F-NEXT: beqz a2, .LBB111_7
14014 ; RV64ZVE32F-NEXT: .LBB111_15: # %cond.load16
14015 ; RV64ZVE32F-NEXT: lh a2, 4(a0)
14016 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
14017 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14018 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
14019 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14020 ; RV64ZVE32F-NEXT: beqz a1, .LBB111_8
14021 ; RV64ZVE32F-NEXT: .LBB111_16: # %cond.load19
14022 ; RV64ZVE32F-NEXT: lh a0, 6(a0)
14023 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14024 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
14025 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
14026 ; RV64ZVE32F-NEXT: ret
14027 %head = insertelement <8 x i1> poison, i1 true, i16 0
14028 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
14029 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 14, i64 15, i64 10, i64 11, i64 6, i64 7, i64 2, i64 3>
14030 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
14034 define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
14035 ; RV32-LABEL: mgather_gather_2xSEW:
14037 ; RV32-NEXT: lui a1, 16513
14038 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14039 ; RV32-NEXT: vmv.s.x v9, a1
14040 ; RV32-NEXT: vluxei8.v v8, (a0), v9
14043 ; RV64V-LABEL: mgather_gather_2xSEW:
14045 ; RV64V-NEXT: lui a1, 16513
14046 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14047 ; RV64V-NEXT: vmv.s.x v9, a1
14048 ; RV64V-NEXT: vluxei8.v v8, (a0), v9
14051 ; RV64ZVE32F-LABEL: mgather_gather_2xSEW:
14052 ; RV64ZVE32F: # %bb.0:
14053 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
14054 ; RV64ZVE32F-NEXT: vmset.m v8
14055 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
14056 ; RV64ZVE32F-NEXT: # implicit-def: $v8
14057 ; RV64ZVE32F-NEXT: beqz zero, .LBB112_9
14058 ; RV64ZVE32F-NEXT: # %bb.1: # %else
14059 ; RV64ZVE32F-NEXT: andi a2, a1, 2
14060 ; RV64ZVE32F-NEXT: bnez a2, .LBB112_10
14061 ; RV64ZVE32F-NEXT: .LBB112_2: # %else2
14062 ; RV64ZVE32F-NEXT: andi a2, a1, 4
14063 ; RV64ZVE32F-NEXT: bnez a2, .LBB112_11
14064 ; RV64ZVE32F-NEXT: .LBB112_3: # %else5
14065 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14066 ; RV64ZVE32F-NEXT: bnez a2, .LBB112_12
14067 ; RV64ZVE32F-NEXT: .LBB112_4: # %else8
14068 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14069 ; RV64ZVE32F-NEXT: bnez a2, .LBB112_13
14070 ; RV64ZVE32F-NEXT: .LBB112_5: # %else11
14071 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14072 ; RV64ZVE32F-NEXT: bnez a2, .LBB112_14
14073 ; RV64ZVE32F-NEXT: .LBB112_6: # %else14
14074 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14075 ; RV64ZVE32F-NEXT: bnez a2, .LBB112_15
14076 ; RV64ZVE32F-NEXT: .LBB112_7: # %else17
14077 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14078 ; RV64ZVE32F-NEXT: bnez a1, .LBB112_16
14079 ; RV64ZVE32F-NEXT: .LBB112_8: # %else20
14080 ; RV64ZVE32F-NEXT: ret
14081 ; RV64ZVE32F-NEXT: .LBB112_9: # %cond.load
14082 ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
14083 ; RV64ZVE32F-NEXT: andi a2, a1, 2
14084 ; RV64ZVE32F-NEXT: beqz a2, .LBB112_2
14085 ; RV64ZVE32F-NEXT: .LBB112_10: # %cond.load1
14086 ; RV64ZVE32F-NEXT: lh a2, 2(a0)
14087 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
14088 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14089 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
14090 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
14091 ; RV64ZVE32F-NEXT: andi a2, a1, 4
14092 ; RV64ZVE32F-NEXT: beqz a2, .LBB112_3
14093 ; RV64ZVE32F-NEXT: .LBB112_11: # %cond.load4
14094 ; RV64ZVE32F-NEXT: lh a2, 16(a0)
14095 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
14096 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14097 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
14098 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14099 ; RV64ZVE32F-NEXT: beqz a2, .LBB112_4
14100 ; RV64ZVE32F-NEXT: .LBB112_12: # %cond.load7
14101 ; RV64ZVE32F-NEXT: lh a2, 18(a0)
14102 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
14103 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14104 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
14105 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14106 ; RV64ZVE32F-NEXT: beqz a2, .LBB112_5
14107 ; RV64ZVE32F-NEXT: .LBB112_13: # %cond.load10
14108 ; RV64ZVE32F-NEXT: lh a2, 8(a0)
14109 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
14110 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14111 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
14112 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14113 ; RV64ZVE32F-NEXT: beqz a2, .LBB112_6
14114 ; RV64ZVE32F-NEXT: .LBB112_14: # %cond.load13
14115 ; RV64ZVE32F-NEXT: lh a2, 10(a0)
14116 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
14117 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14118 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
14119 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14120 ; RV64ZVE32F-NEXT: beqz a2, .LBB112_7
14121 ; RV64ZVE32F-NEXT: .LBB112_15: # %cond.load16
14122 ; RV64ZVE32F-NEXT: lh a2, 4(a0)
14123 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
14124 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14125 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
14126 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14127 ; RV64ZVE32F-NEXT: beqz a1, .LBB112_8
14128 ; RV64ZVE32F-NEXT: .LBB112_16: # %cond.load19
14129 ; RV64ZVE32F-NEXT: lh a0, 6(a0)
14130 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14131 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
14132 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
14133 ; RV64ZVE32F-NEXT: ret
14134 %head = insertelement <8 x i1> poison, i1 true, i16 0
14135 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
14136 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 2, i32 3>
14137 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
14141 ; Base pointer isn't sufficiently aligned to form gather with e32
14142 define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) {
14143 ; RV32-LABEL: mgather_gather_2xSEW_unaligned:
14145 ; RV32-NEXT: lui a1, %hi(.LCPI113_0)
14146 ; RV32-NEXT: addi a1, a1, %lo(.LCPI113_0)
14147 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14148 ; RV32-NEXT: vle8.v v9, (a1)
14149 ; RV32-NEXT: vluxei8.v v8, (a0), v9
14152 ; RV64V-LABEL: mgather_gather_2xSEW_unaligned:
14154 ; RV64V-NEXT: lui a1, %hi(.LCPI113_0)
14155 ; RV64V-NEXT: addi a1, a1, %lo(.LCPI113_0)
14156 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14157 ; RV64V-NEXT: vle8.v v9, (a1)
14158 ; RV64V-NEXT: vluxei8.v v8, (a0), v9
14161 ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned:
14162 ; RV64ZVE32F: # %bb.0:
14163 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
14164 ; RV64ZVE32F-NEXT: vmset.m v8
14165 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
14166 ; RV64ZVE32F-NEXT: # implicit-def: $v8
14167 ; RV64ZVE32F-NEXT: beqz zero, .LBB113_9
14168 ; RV64ZVE32F-NEXT: # %bb.1: # %else
14169 ; RV64ZVE32F-NEXT: andi a2, a1, 2
14170 ; RV64ZVE32F-NEXT: bnez a2, .LBB113_10
14171 ; RV64ZVE32F-NEXT: .LBB113_2: # %else2
14172 ; RV64ZVE32F-NEXT: andi a2, a1, 4
14173 ; RV64ZVE32F-NEXT: bnez a2, .LBB113_11
14174 ; RV64ZVE32F-NEXT: .LBB113_3: # %else5
14175 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14176 ; RV64ZVE32F-NEXT: bnez a2, .LBB113_12
14177 ; RV64ZVE32F-NEXT: .LBB113_4: # %else8
14178 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14179 ; RV64ZVE32F-NEXT: bnez a2, .LBB113_13
14180 ; RV64ZVE32F-NEXT: .LBB113_5: # %else11
14181 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14182 ; RV64ZVE32F-NEXT: bnez a2, .LBB113_14
14183 ; RV64ZVE32F-NEXT: .LBB113_6: # %else14
14184 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14185 ; RV64ZVE32F-NEXT: bnez a2, .LBB113_15
14186 ; RV64ZVE32F-NEXT: .LBB113_7: # %else17
14187 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14188 ; RV64ZVE32F-NEXT: bnez a1, .LBB113_16
14189 ; RV64ZVE32F-NEXT: .LBB113_8: # %else20
14190 ; RV64ZVE32F-NEXT: ret
14191 ; RV64ZVE32F-NEXT: .LBB113_9: # %cond.load
14192 ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
14193 ; RV64ZVE32F-NEXT: andi a2, a1, 2
14194 ; RV64ZVE32F-NEXT: beqz a2, .LBB113_2
14195 ; RV64ZVE32F-NEXT: .LBB113_10: # %cond.load1
14196 ; RV64ZVE32F-NEXT: lh a2, 2(a0)
14197 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
14198 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14199 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
14200 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
14201 ; RV64ZVE32F-NEXT: andi a2, a1, 4
14202 ; RV64ZVE32F-NEXT: beqz a2, .LBB113_3
14203 ; RV64ZVE32F-NEXT: .LBB113_11: # %cond.load4
14204 ; RV64ZVE32F-NEXT: lh a2, 18(a0)
14205 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
14206 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14207 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
14208 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14209 ; RV64ZVE32F-NEXT: beqz a2, .LBB113_4
14210 ; RV64ZVE32F-NEXT: .LBB113_12: # %cond.load7
14211 ; RV64ZVE32F-NEXT: lh a2, 20(a0)
14212 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
14213 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14214 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
14215 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14216 ; RV64ZVE32F-NEXT: beqz a2, .LBB113_5
14217 ; RV64ZVE32F-NEXT: .LBB113_13: # %cond.load10
14218 ; RV64ZVE32F-NEXT: lh a2, 8(a0)
14219 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
14220 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14221 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
14222 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14223 ; RV64ZVE32F-NEXT: beqz a2, .LBB113_6
14224 ; RV64ZVE32F-NEXT: .LBB113_14: # %cond.load13
14225 ; RV64ZVE32F-NEXT: lh a2, 10(a0)
14226 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
14227 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14228 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
14229 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14230 ; RV64ZVE32F-NEXT: beqz a2, .LBB113_7
14231 ; RV64ZVE32F-NEXT: .LBB113_15: # %cond.load16
14232 ; RV64ZVE32F-NEXT: lh a2, 4(a0)
14233 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
14234 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14235 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
14236 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14237 ; RV64ZVE32F-NEXT: beqz a1, .LBB113_8
14238 ; RV64ZVE32F-NEXT: .LBB113_16: # %cond.load19
14239 ; RV64ZVE32F-NEXT: lh a0, 6(a0)
14240 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14241 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
14242 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
14243 ; RV64ZVE32F-NEXT: ret
14244 %head = insertelement <8 x i1> poison, i1 true, i16 0
14245 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
14246 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
14247 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %allones, <8 x i16> poison)
14251 ; Despite sufficient starting alignment, the index values aren't properly
14253 define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) {
14254 ; RV32-LABEL: mgather_gather_2xSEW_unaligned2:
14256 ; RV32-NEXT: lui a1, %hi(.LCPI114_0)
14257 ; RV32-NEXT: addi a1, a1, %lo(.LCPI114_0)
14258 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14259 ; RV32-NEXT: vle8.v v9, (a1)
14260 ; RV32-NEXT: vluxei8.v v8, (a0), v9
14263 ; RV64V-LABEL: mgather_gather_2xSEW_unaligned2:
14265 ; RV64V-NEXT: lui a1, %hi(.LCPI114_0)
14266 ; RV64V-NEXT: addi a1, a1, %lo(.LCPI114_0)
14267 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14268 ; RV64V-NEXT: vle8.v v9, (a1)
14269 ; RV64V-NEXT: vluxei8.v v8, (a0), v9
14272 ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned2:
14273 ; RV64ZVE32F: # %bb.0:
14274 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
14275 ; RV64ZVE32F-NEXT: vmset.m v8
14276 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
14277 ; RV64ZVE32F-NEXT: # implicit-def: $v8
14278 ; RV64ZVE32F-NEXT: beqz zero, .LBB114_9
14279 ; RV64ZVE32F-NEXT: # %bb.1: # %else
14280 ; RV64ZVE32F-NEXT: andi a2, a1, 2
14281 ; RV64ZVE32F-NEXT: bnez a2, .LBB114_10
14282 ; RV64ZVE32F-NEXT: .LBB114_2: # %else2
14283 ; RV64ZVE32F-NEXT: andi a2, a1, 4
14284 ; RV64ZVE32F-NEXT: bnez a2, .LBB114_11
14285 ; RV64ZVE32F-NEXT: .LBB114_3: # %else5
14286 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14287 ; RV64ZVE32F-NEXT: bnez a2, .LBB114_12
14288 ; RV64ZVE32F-NEXT: .LBB114_4: # %else8
14289 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14290 ; RV64ZVE32F-NEXT: bnez a2, .LBB114_13
14291 ; RV64ZVE32F-NEXT: .LBB114_5: # %else11
14292 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14293 ; RV64ZVE32F-NEXT: bnez a2, .LBB114_14
14294 ; RV64ZVE32F-NEXT: .LBB114_6: # %else14
14295 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14296 ; RV64ZVE32F-NEXT: bnez a2, .LBB114_15
14297 ; RV64ZVE32F-NEXT: .LBB114_7: # %else17
14298 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14299 ; RV64ZVE32F-NEXT: bnez a1, .LBB114_16
14300 ; RV64ZVE32F-NEXT: .LBB114_8: # %else20
14301 ; RV64ZVE32F-NEXT: ret
14302 ; RV64ZVE32F-NEXT: .LBB114_9: # %cond.load
14303 ; RV64ZVE32F-NEXT: addi a2, a0, 2
14304 ; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero
14305 ; RV64ZVE32F-NEXT: andi a2, a1, 2
14306 ; RV64ZVE32F-NEXT: beqz a2, .LBB114_2
14307 ; RV64ZVE32F-NEXT: .LBB114_10: # %cond.load1
14308 ; RV64ZVE32F-NEXT: lh a2, 4(a0)
14309 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
14310 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14311 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
14312 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
14313 ; RV64ZVE32F-NEXT: andi a2, a1, 4
14314 ; RV64ZVE32F-NEXT: beqz a2, .LBB114_3
14315 ; RV64ZVE32F-NEXT: .LBB114_11: # %cond.load4
14316 ; RV64ZVE32F-NEXT: lh a2, 18(a0)
14317 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
14318 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14319 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
14320 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14321 ; RV64ZVE32F-NEXT: beqz a2, .LBB114_4
14322 ; RV64ZVE32F-NEXT: .LBB114_12: # %cond.load7
14323 ; RV64ZVE32F-NEXT: lh a2, 20(a0)
14324 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
14325 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14326 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
14327 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14328 ; RV64ZVE32F-NEXT: beqz a2, .LBB114_5
14329 ; RV64ZVE32F-NEXT: .LBB114_13: # %cond.load10
14330 ; RV64ZVE32F-NEXT: lh a2, 8(a0)
14331 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
14332 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14333 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
14334 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14335 ; RV64ZVE32F-NEXT: beqz a2, .LBB114_6
14336 ; RV64ZVE32F-NEXT: .LBB114_14: # %cond.load13
14337 ; RV64ZVE32F-NEXT: lh a2, 10(a0)
14338 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
14339 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14340 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
14341 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14342 ; RV64ZVE32F-NEXT: beqz a2, .LBB114_7
14343 ; RV64ZVE32F-NEXT: .LBB114_15: # %cond.load16
14344 ; RV64ZVE32F-NEXT: lh a2, 4(a0)
14345 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
14346 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14347 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
14348 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14349 ; RV64ZVE32F-NEXT: beqz a1, .LBB114_8
14350 ; RV64ZVE32F-NEXT: .LBB114_16: # %cond.load19
14351 ; RV64ZVE32F-NEXT: lh a0, 6(a0)
14352 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14353 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
14354 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
14355 ; RV64ZVE32F-NEXT: ret
14356 %head = insertelement <8 x i1> poison, i1 true, i16 0
14357 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
14358 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 4, i32 5, i32 2, i32 3>
14359 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
14363 define <8 x i16> @mgather_gather_4xSEW(ptr %base) {
14364 ; RV32V-LABEL: mgather_gather_4xSEW:
14366 ; RV32V-NEXT: li a1, 16
14367 ; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
14368 ; RV32V-NEXT: vlse64.v v8, (a0), a1
14371 ; RV64V-LABEL: mgather_gather_4xSEW:
14373 ; RV64V-NEXT: li a1, 16
14374 ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma
14375 ; RV64V-NEXT: vlse64.v v8, (a0), a1
14378 ; RV32ZVE32F-LABEL: mgather_gather_4xSEW:
14379 ; RV32ZVE32F: # %bb.0:
14380 ; RV32ZVE32F-NEXT: lui a1, 82176
14381 ; RV32ZVE32F-NEXT: addi a1, a1, 1024
14382 ; RV32ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14383 ; RV32ZVE32F-NEXT: vmv.s.x v9, a1
14384 ; RV32ZVE32F-NEXT: vluxei8.v v8, (a0), v9
14385 ; RV32ZVE32F-NEXT: ret
14387 ; RV64ZVE32F-LABEL: mgather_gather_4xSEW:
14388 ; RV64ZVE32F: # %bb.0:
14389 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
14390 ; RV64ZVE32F-NEXT: vmset.m v8
14391 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
14392 ; RV64ZVE32F-NEXT: # implicit-def: $v8
14393 ; RV64ZVE32F-NEXT: beqz zero, .LBB115_9
14394 ; RV64ZVE32F-NEXT: # %bb.1: # %else
14395 ; RV64ZVE32F-NEXT: andi a2, a1, 2
14396 ; RV64ZVE32F-NEXT: bnez a2, .LBB115_10
14397 ; RV64ZVE32F-NEXT: .LBB115_2: # %else2
14398 ; RV64ZVE32F-NEXT: andi a2, a1, 4
14399 ; RV64ZVE32F-NEXT: bnez a2, .LBB115_11
14400 ; RV64ZVE32F-NEXT: .LBB115_3: # %else5
14401 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14402 ; RV64ZVE32F-NEXT: bnez a2, .LBB115_12
14403 ; RV64ZVE32F-NEXT: .LBB115_4: # %else8
14404 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14405 ; RV64ZVE32F-NEXT: bnez a2, .LBB115_13
14406 ; RV64ZVE32F-NEXT: .LBB115_5: # %else11
14407 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14408 ; RV64ZVE32F-NEXT: bnez a2, .LBB115_14
14409 ; RV64ZVE32F-NEXT: .LBB115_6: # %else14
14410 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14411 ; RV64ZVE32F-NEXT: bnez a2, .LBB115_15
14412 ; RV64ZVE32F-NEXT: .LBB115_7: # %else17
14413 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14414 ; RV64ZVE32F-NEXT: bnez a1, .LBB115_16
14415 ; RV64ZVE32F-NEXT: .LBB115_8: # %else20
14416 ; RV64ZVE32F-NEXT: ret
14417 ; RV64ZVE32F-NEXT: .LBB115_9: # %cond.load
14418 ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
14419 ; RV64ZVE32F-NEXT: andi a2, a1, 2
14420 ; RV64ZVE32F-NEXT: beqz a2, .LBB115_2
14421 ; RV64ZVE32F-NEXT: .LBB115_10: # %cond.load1
14422 ; RV64ZVE32F-NEXT: lh a2, 2(a0)
14423 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
14424 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14425 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
14426 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
14427 ; RV64ZVE32F-NEXT: andi a2, a1, 4
14428 ; RV64ZVE32F-NEXT: beqz a2, .LBB115_3
14429 ; RV64ZVE32F-NEXT: .LBB115_11: # %cond.load4
14430 ; RV64ZVE32F-NEXT: lh a2, 4(a0)
14431 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
14432 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14433 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
14434 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14435 ; RV64ZVE32F-NEXT: beqz a2, .LBB115_4
14436 ; RV64ZVE32F-NEXT: .LBB115_12: # %cond.load7
14437 ; RV64ZVE32F-NEXT: lh a2, 6(a0)
14438 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
14439 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14440 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
14441 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14442 ; RV64ZVE32F-NEXT: beqz a2, .LBB115_5
14443 ; RV64ZVE32F-NEXT: .LBB115_13: # %cond.load10
14444 ; RV64ZVE32F-NEXT: lh a2, 16(a0)
14445 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
14446 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14447 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
14448 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14449 ; RV64ZVE32F-NEXT: beqz a2, .LBB115_6
14450 ; RV64ZVE32F-NEXT: .LBB115_14: # %cond.load13
14451 ; RV64ZVE32F-NEXT: lh a2, 18(a0)
14452 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
14453 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14454 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
14455 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14456 ; RV64ZVE32F-NEXT: beqz a2, .LBB115_7
14457 ; RV64ZVE32F-NEXT: .LBB115_15: # %cond.load16
14458 ; RV64ZVE32F-NEXT: lh a2, 20(a0)
14459 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
14460 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14461 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
14462 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14463 ; RV64ZVE32F-NEXT: beqz a1, .LBB115_8
14464 ; RV64ZVE32F-NEXT: .LBB115_16: # %cond.load19
14465 ; RV64ZVE32F-NEXT: lh a0, 22(a0)
14466 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14467 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
14468 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
14469 ; RV64ZVE32F-NEXT: ret
14470 %head = insertelement <8 x i1> poison, i1 true, i16 0
14471 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
14472 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
14473 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %allones, <8 x i16> poison)
14477 ; This is a case where we'd be able to do 4xSEW if we had proper alignment
14478 ; but we only have sufficient alignment for 2xSEW.
14479 define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) {
14480 ; RV32-LABEL: mgather_gather_4xSEW_partial_align:
14482 ; RV32-NEXT: lui a1, 82176
14483 ; RV32-NEXT: addi a1, a1, 1024
14484 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14485 ; RV32-NEXT: vmv.s.x v9, a1
14486 ; RV32-NEXT: vluxei8.v v8, (a0), v9
14489 ; RV64V-LABEL: mgather_gather_4xSEW_partial_align:
14491 ; RV64V-NEXT: lui a1, 82176
14492 ; RV64V-NEXT: addi a1, a1, 1024
14493 ; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
14494 ; RV64V-NEXT: vmv.s.x v9, a1
14495 ; RV64V-NEXT: vluxei8.v v8, (a0), v9
14498 ; RV64ZVE32F-LABEL: mgather_gather_4xSEW_partial_align:
14499 ; RV64ZVE32F: # %bb.0:
14500 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
14501 ; RV64ZVE32F-NEXT: vmset.m v8
14502 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
14503 ; RV64ZVE32F-NEXT: # implicit-def: $v8
14504 ; RV64ZVE32F-NEXT: beqz zero, .LBB116_9
14505 ; RV64ZVE32F-NEXT: # %bb.1: # %else
14506 ; RV64ZVE32F-NEXT: andi a2, a1, 2
14507 ; RV64ZVE32F-NEXT: bnez a2, .LBB116_10
14508 ; RV64ZVE32F-NEXT: .LBB116_2: # %else2
14509 ; RV64ZVE32F-NEXT: andi a2, a1, 4
14510 ; RV64ZVE32F-NEXT: bnez a2, .LBB116_11
14511 ; RV64ZVE32F-NEXT: .LBB116_3: # %else5
14512 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14513 ; RV64ZVE32F-NEXT: bnez a2, .LBB116_12
14514 ; RV64ZVE32F-NEXT: .LBB116_4: # %else8
14515 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14516 ; RV64ZVE32F-NEXT: bnez a2, .LBB116_13
14517 ; RV64ZVE32F-NEXT: .LBB116_5: # %else11
14518 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14519 ; RV64ZVE32F-NEXT: bnez a2, .LBB116_14
14520 ; RV64ZVE32F-NEXT: .LBB116_6: # %else14
14521 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14522 ; RV64ZVE32F-NEXT: bnez a2, .LBB116_15
14523 ; RV64ZVE32F-NEXT: .LBB116_7: # %else17
14524 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14525 ; RV64ZVE32F-NEXT: bnez a1, .LBB116_16
14526 ; RV64ZVE32F-NEXT: .LBB116_8: # %else20
14527 ; RV64ZVE32F-NEXT: ret
14528 ; RV64ZVE32F-NEXT: .LBB116_9: # %cond.load
14529 ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
14530 ; RV64ZVE32F-NEXT: andi a2, a1, 2
14531 ; RV64ZVE32F-NEXT: beqz a2, .LBB116_2
14532 ; RV64ZVE32F-NEXT: .LBB116_10: # %cond.load1
14533 ; RV64ZVE32F-NEXT: lh a2, 2(a0)
14534 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
14535 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14536 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
14537 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
14538 ; RV64ZVE32F-NEXT: andi a2, a1, 4
14539 ; RV64ZVE32F-NEXT: beqz a2, .LBB116_3
14540 ; RV64ZVE32F-NEXT: .LBB116_11: # %cond.load4
14541 ; RV64ZVE32F-NEXT: lh a2, 4(a0)
14542 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
14543 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14544 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
14545 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14546 ; RV64ZVE32F-NEXT: beqz a2, .LBB116_4
14547 ; RV64ZVE32F-NEXT: .LBB116_12: # %cond.load7
14548 ; RV64ZVE32F-NEXT: lh a2, 6(a0)
14549 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
14550 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14551 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
14552 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14553 ; RV64ZVE32F-NEXT: beqz a2, .LBB116_5
14554 ; RV64ZVE32F-NEXT: .LBB116_13: # %cond.load10
14555 ; RV64ZVE32F-NEXT: lh a2, 16(a0)
14556 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
14557 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14558 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
14559 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14560 ; RV64ZVE32F-NEXT: beqz a2, .LBB116_6
14561 ; RV64ZVE32F-NEXT: .LBB116_14: # %cond.load13
14562 ; RV64ZVE32F-NEXT: lh a2, 18(a0)
14563 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
14564 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14565 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
14566 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14567 ; RV64ZVE32F-NEXT: beqz a2, .LBB116_7
14568 ; RV64ZVE32F-NEXT: .LBB116_15: # %cond.load16
14569 ; RV64ZVE32F-NEXT: lh a2, 20(a0)
14570 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
14571 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14572 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
14573 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14574 ; RV64ZVE32F-NEXT: beqz a1, .LBB116_8
14575 ; RV64ZVE32F-NEXT: .LBB116_16: # %cond.load19
14576 ; RV64ZVE32F-NEXT: lh a0, 22(a0)
14577 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14578 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
14579 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
14580 ; RV64ZVE32F-NEXT: ret
14581 %head = insertelement <8 x i1> poison, i1 true, i16 0
14582 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
14583 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
14584 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
14588 define <8 x i16> @mgather_shuffle_reverse(ptr %base) {
14589 ; CHECK-LABEL: mgather_shuffle_reverse:
14591 ; CHECK-NEXT: addi a0, a0, 14
14592 ; CHECK-NEXT: li a1, -2
14593 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14594 ; CHECK-NEXT: vlse16.v v8, (a0), a1
14596 %head = insertelement <8 x i1> poison, i1 true, i16 0
14597 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
14598 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>
14599 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
14603 define <8 x i16> @mgather_shuffle_rotate(ptr %base) {
14604 ; RV32-LABEL: mgather_shuffle_rotate:
14606 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14607 ; RV32-NEXT: vle16.v v9, (a0)
14608 ; RV32-NEXT: vslidedown.vi v8, v9, 4
14609 ; RV32-NEXT: vslideup.vi v8, v9, 4
14612 ; RV64V-LABEL: mgather_shuffle_rotate:
14614 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14615 ; RV64V-NEXT: vle16.v v9, (a0)
14616 ; RV64V-NEXT: vslidedown.vi v8, v9, 4
14617 ; RV64V-NEXT: vslideup.vi v8, v9, 4
14620 ; RV64ZVE32F-LABEL: mgather_shuffle_rotate:
14621 ; RV64ZVE32F: # %bb.0:
14622 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
14623 ; RV64ZVE32F-NEXT: vmset.m v8
14624 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
14625 ; RV64ZVE32F-NEXT: # implicit-def: $v8
14626 ; RV64ZVE32F-NEXT: beqz zero, .LBB118_9
14627 ; RV64ZVE32F-NEXT: # %bb.1: # %else
14628 ; RV64ZVE32F-NEXT: andi a2, a1, 2
14629 ; RV64ZVE32F-NEXT: bnez a2, .LBB118_10
14630 ; RV64ZVE32F-NEXT: .LBB118_2: # %else2
14631 ; RV64ZVE32F-NEXT: andi a2, a1, 4
14632 ; RV64ZVE32F-NEXT: bnez a2, .LBB118_11
14633 ; RV64ZVE32F-NEXT: .LBB118_3: # %else5
14634 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14635 ; RV64ZVE32F-NEXT: bnez a2, .LBB118_12
14636 ; RV64ZVE32F-NEXT: .LBB118_4: # %else8
14637 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14638 ; RV64ZVE32F-NEXT: bnez a2, .LBB118_13
14639 ; RV64ZVE32F-NEXT: .LBB118_5: # %else11
14640 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14641 ; RV64ZVE32F-NEXT: bnez a2, .LBB118_14
14642 ; RV64ZVE32F-NEXT: .LBB118_6: # %else14
14643 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14644 ; RV64ZVE32F-NEXT: bnez a2, .LBB118_15
14645 ; RV64ZVE32F-NEXT: .LBB118_7: # %else17
14646 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14647 ; RV64ZVE32F-NEXT: bnez a1, .LBB118_16
14648 ; RV64ZVE32F-NEXT: .LBB118_8: # %else20
14649 ; RV64ZVE32F-NEXT: ret
14650 ; RV64ZVE32F-NEXT: .LBB118_9: # %cond.load
14651 ; RV64ZVE32F-NEXT: addi a2, a0, 8
14652 ; RV64ZVE32F-NEXT: vlse16.v v8, (a2), zero
14653 ; RV64ZVE32F-NEXT: andi a2, a1, 2
14654 ; RV64ZVE32F-NEXT: beqz a2, .LBB118_2
14655 ; RV64ZVE32F-NEXT: .LBB118_10: # %cond.load1
14656 ; RV64ZVE32F-NEXT: lh a2, 10(a0)
14657 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
14658 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14659 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
14660 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
14661 ; RV64ZVE32F-NEXT: andi a2, a1, 4
14662 ; RV64ZVE32F-NEXT: beqz a2, .LBB118_3
14663 ; RV64ZVE32F-NEXT: .LBB118_11: # %cond.load4
14664 ; RV64ZVE32F-NEXT: lh a2, 12(a0)
14665 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
14666 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14667 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
14668 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14669 ; RV64ZVE32F-NEXT: beqz a2, .LBB118_4
14670 ; RV64ZVE32F-NEXT: .LBB118_12: # %cond.load7
14671 ; RV64ZVE32F-NEXT: lh a2, 14(a0)
14672 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
14673 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14674 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
14675 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14676 ; RV64ZVE32F-NEXT: beqz a2, .LBB118_5
14677 ; RV64ZVE32F-NEXT: .LBB118_13: # %cond.load10
14678 ; RV64ZVE32F-NEXT: lh a2, 0(a0)
14679 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
14680 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14681 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
14682 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14683 ; RV64ZVE32F-NEXT: beqz a2, .LBB118_6
14684 ; RV64ZVE32F-NEXT: .LBB118_14: # %cond.load13
14685 ; RV64ZVE32F-NEXT: lh a2, 2(a0)
14686 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
14687 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14688 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
14689 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14690 ; RV64ZVE32F-NEXT: beqz a2, .LBB118_7
14691 ; RV64ZVE32F-NEXT: .LBB118_15: # %cond.load16
14692 ; RV64ZVE32F-NEXT: lh a2, 4(a0)
14693 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
14694 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14695 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
14696 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14697 ; RV64ZVE32F-NEXT: beqz a1, .LBB118_8
14698 ; RV64ZVE32F-NEXT: .LBB118_16: # %cond.load19
14699 ; RV64ZVE32F-NEXT: lh a0, 6(a0)
14700 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14701 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
14702 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
14703 ; RV64ZVE32F-NEXT: ret
14704 %head = insertelement <8 x i1> poison, i1 true, i16 0
14705 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
14706 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 4, i64 5, i64 6, i64 7, i64 0, i64 1, i64 2, i64 3>
14707 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
14711 define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
14712 ; RV32-LABEL: mgather_shuffle_vrgather:
14714 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14715 ; RV32-NEXT: vle16.v v9, (a0)
14716 ; RV32-NEXT: lui a0, %hi(.LCPI119_0)
14717 ; RV32-NEXT: addi a0, a0, %lo(.LCPI119_0)
14718 ; RV32-NEXT: vle16.v v10, (a0)
14719 ; RV32-NEXT: vrgather.vv v8, v9, v10
14722 ; RV64V-LABEL: mgather_shuffle_vrgather:
14724 ; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14725 ; RV64V-NEXT: vle16.v v9, (a0)
14726 ; RV64V-NEXT: lui a0, %hi(.LCPI119_0)
14727 ; RV64V-NEXT: addi a0, a0, %lo(.LCPI119_0)
14728 ; RV64V-NEXT: vle16.v v10, (a0)
14729 ; RV64V-NEXT: vrgather.vv v8, v9, v10
14732 ; RV64ZVE32F-LABEL: mgather_shuffle_vrgather:
14733 ; RV64ZVE32F: # %bb.0:
14734 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
14735 ; RV64ZVE32F-NEXT: vmset.m v8
14736 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8
14737 ; RV64ZVE32F-NEXT: # implicit-def: $v8
14738 ; RV64ZVE32F-NEXT: beqz zero, .LBB119_9
14739 ; RV64ZVE32F-NEXT: # %bb.1: # %else
14740 ; RV64ZVE32F-NEXT: andi a2, a1, 2
14741 ; RV64ZVE32F-NEXT: bnez a2, .LBB119_10
14742 ; RV64ZVE32F-NEXT: .LBB119_2: # %else2
14743 ; RV64ZVE32F-NEXT: andi a2, a1, 4
14744 ; RV64ZVE32F-NEXT: bnez a2, .LBB119_11
14745 ; RV64ZVE32F-NEXT: .LBB119_3: # %else5
14746 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14747 ; RV64ZVE32F-NEXT: bnez a2, .LBB119_12
14748 ; RV64ZVE32F-NEXT: .LBB119_4: # %else8
14749 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14750 ; RV64ZVE32F-NEXT: bnez a2, .LBB119_13
14751 ; RV64ZVE32F-NEXT: .LBB119_5: # %else11
14752 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14753 ; RV64ZVE32F-NEXT: bnez a2, .LBB119_14
14754 ; RV64ZVE32F-NEXT: .LBB119_6: # %else14
14755 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14756 ; RV64ZVE32F-NEXT: bnez a2, .LBB119_15
14757 ; RV64ZVE32F-NEXT: .LBB119_7: # %else17
14758 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14759 ; RV64ZVE32F-NEXT: bnez a1, .LBB119_16
14760 ; RV64ZVE32F-NEXT: .LBB119_8: # %else20
14761 ; RV64ZVE32F-NEXT: ret
14762 ; RV64ZVE32F-NEXT: .LBB119_9: # %cond.load
14763 ; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
14764 ; RV64ZVE32F-NEXT: andi a2, a1, 2
14765 ; RV64ZVE32F-NEXT: beqz a2, .LBB119_2
14766 ; RV64ZVE32F-NEXT: .LBB119_10: # %cond.load1
14767 ; RV64ZVE32F-NEXT: lh a2, 4(a0)
14768 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
14769 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14770 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
14771 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
14772 ; RV64ZVE32F-NEXT: andi a2, a1, 4
14773 ; RV64ZVE32F-NEXT: beqz a2, .LBB119_3
14774 ; RV64ZVE32F-NEXT: .LBB119_11: # %cond.load4
14775 ; RV64ZVE32F-NEXT: lh a2, 6(a0)
14776 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
14777 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14778 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
14779 ; RV64ZVE32F-NEXT: andi a2, a1, 8
14780 ; RV64ZVE32F-NEXT: beqz a2, .LBB119_4
14781 ; RV64ZVE32F-NEXT: .LBB119_12: # %cond.load7
14782 ; RV64ZVE32F-NEXT: lh a2, 2(a0)
14783 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
14784 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14785 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
14786 ; RV64ZVE32F-NEXT: andi a2, a1, 16
14787 ; RV64ZVE32F-NEXT: beqz a2, .LBB119_5
14788 ; RV64ZVE32F-NEXT: .LBB119_13: # %cond.load10
14789 ; RV64ZVE32F-NEXT: lh a2, 8(a0)
14790 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
14791 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14792 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
14793 ; RV64ZVE32F-NEXT: andi a2, a1, 32
14794 ; RV64ZVE32F-NEXT: beqz a2, .LBB119_6
14795 ; RV64ZVE32F-NEXT: .LBB119_14: # %cond.load13
14796 ; RV64ZVE32F-NEXT: lh a2, 10(a0)
14797 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
14798 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14799 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
14800 ; RV64ZVE32F-NEXT: andi a2, a1, 64
14801 ; RV64ZVE32F-NEXT: beqz a2, .LBB119_7
14802 ; RV64ZVE32F-NEXT: .LBB119_15: # %cond.load16
14803 ; RV64ZVE32F-NEXT: lh a2, 12(a0)
14804 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
14805 ; RV64ZVE32F-NEXT: vmv.s.x v9, a2
14806 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
14807 ; RV64ZVE32F-NEXT: andi a1, a1, -128
14808 ; RV64ZVE32F-NEXT: beqz a1, .LBB119_8
14809 ; RV64ZVE32F-NEXT: .LBB119_16: # %cond.load19
14810 ; RV64ZVE32F-NEXT: lh a0, 14(a0)
14811 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
14812 ; RV64ZVE32F-NEXT: vmv.s.x v9, a0
14813 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
14814 ; RV64ZVE32F-NEXT: ret
14815 %head = insertelement <8 x i1> poison, i1 true, i16 0
14816 %allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
14817 %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> <i64 0, i64 2, i64 3, i64 1, i64 4, i64 5, i64 6, i64 7>
14818 %v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
14821 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: