1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 declare <1 x i8> @llvm.masked.expandload.v1i8(ptr, <1 x i1>, <1 x i8>)
6 define <1 x i8> @expandload_v1i8(ptr %base, <1 x i8> %src0, <1 x i1> %mask) {
7 ; CHECK-LABEL: expandload_v1i8:
9 ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
10 ; CHECK-NEXT: vfirst.m a1, v0
11 ; CHECK-NEXT: bnez a1, .LBB0_2
12 ; CHECK-NEXT: # %bb.1: # %cond.load
13 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
14 ; CHECK-NEXT: vle8.v v8, (a0)
15 ; CHECK-NEXT: .LBB0_2: # %else
17 %res = call <1 x i8> @llvm.masked.expandload.v1i8(ptr %base, <1 x i1> %mask, <1 x i8> %src0)
21 declare <2 x i8> @llvm.masked.expandload.v2i8(ptr, <2 x i1>, <2 x i8>)
22 define <2 x i8> @expandload_v2i8(ptr %base, <2 x i8> %src0, <2 x i1> %mask) {
23 ; CHECK-LABEL: expandload_v2i8:
25 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
26 ; CHECK-NEXT: vmv.x.s a1, v0
27 ; CHECK-NEXT: andi a2, a1, 1
28 ; CHECK-NEXT: bnez a2, .LBB1_3
29 ; CHECK-NEXT: # %bb.1: # %else
30 ; CHECK-NEXT: andi a1, a1, 2
31 ; CHECK-NEXT: bnez a1, .LBB1_4
32 ; CHECK-NEXT: .LBB1_2: # %else2
34 ; CHECK-NEXT: .LBB1_3: # %cond.load
35 ; CHECK-NEXT: lbu a2, 0(a0)
36 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma
37 ; CHECK-NEXT: vmv.s.x v8, a2
38 ; CHECK-NEXT: addi a0, a0, 1
39 ; CHECK-NEXT: andi a1, a1, 2
40 ; CHECK-NEXT: beqz a1, .LBB1_2
41 ; CHECK-NEXT: .LBB1_4: # %cond.load1
42 ; CHECK-NEXT: lbu a0, 0(a0)
43 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
44 ; CHECK-NEXT: vmv.s.x v9, a0
45 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
46 ; CHECK-NEXT: vslideup.vi v8, v9, 1
48 %res = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> %mask, <2 x i8> %src0)
52 declare <4 x i8> @llvm.masked.expandload.v4i8(ptr, <4 x i1>, <4 x i8>)
53 define <4 x i8> @expandload_v4i8(ptr %base, <4 x i8> %src0, <4 x i1> %mask) {
54 ; CHECK-LABEL: expandload_v4i8:
56 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
57 ; CHECK-NEXT: vmv.x.s a1, v0
58 ; CHECK-NEXT: andi a2, a1, 1
59 ; CHECK-NEXT: bnez a2, .LBB2_5
60 ; CHECK-NEXT: # %bb.1: # %else
61 ; CHECK-NEXT: andi a2, a1, 2
62 ; CHECK-NEXT: bnez a2, .LBB2_6
63 ; CHECK-NEXT: .LBB2_2: # %else2
64 ; CHECK-NEXT: andi a2, a1, 4
65 ; CHECK-NEXT: bnez a2, .LBB2_7
66 ; CHECK-NEXT: .LBB2_3: # %else6
67 ; CHECK-NEXT: andi a1, a1, 8
68 ; CHECK-NEXT: bnez a1, .LBB2_8
69 ; CHECK-NEXT: .LBB2_4: # %else10
71 ; CHECK-NEXT: .LBB2_5: # %cond.load
72 ; CHECK-NEXT: lbu a2, 0(a0)
73 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma
74 ; CHECK-NEXT: vmv.s.x v8, a2
75 ; CHECK-NEXT: addi a0, a0, 1
76 ; CHECK-NEXT: andi a2, a1, 2
77 ; CHECK-NEXT: beqz a2, .LBB2_2
78 ; CHECK-NEXT: .LBB2_6: # %cond.load1
79 ; CHECK-NEXT: lbu a2, 0(a0)
80 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
81 ; CHECK-NEXT: vmv.s.x v9, a2
82 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
83 ; CHECK-NEXT: vslideup.vi v8, v9, 1
84 ; CHECK-NEXT: addi a0, a0, 1
85 ; CHECK-NEXT: andi a2, a1, 4
86 ; CHECK-NEXT: beqz a2, .LBB2_3
87 ; CHECK-NEXT: .LBB2_7: # %cond.load5
88 ; CHECK-NEXT: lbu a2, 0(a0)
89 ; CHECK-NEXT: vsetivli zero, 3, e8, mf4, tu, ma
90 ; CHECK-NEXT: vmv.s.x v9, a2
91 ; CHECK-NEXT: vslideup.vi v8, v9, 2
92 ; CHECK-NEXT: addi a0, a0, 1
93 ; CHECK-NEXT: andi a1, a1, 8
94 ; CHECK-NEXT: beqz a1, .LBB2_4
95 ; CHECK-NEXT: .LBB2_8: # %cond.load9
96 ; CHECK-NEXT: lbu a0, 0(a0)
97 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
98 ; CHECK-NEXT: vmv.s.x v9, a0
99 ; CHECK-NEXT: vslideup.vi v8, v9, 3
101 %res = call <4 x i8> @llvm.masked.expandload.v4i8(ptr %base, <4 x i1> %mask, <4 x i8> %src0)
105 declare <8 x i8> @llvm.masked.expandload.v8i8(ptr, <8 x i1>, <8 x i8>)
106 define <8 x i8> @expandload_v8i8(ptr %base, <8 x i8> %src0, <8 x i1> %mask) {
107 ; CHECK-LABEL: expandload_v8i8:
109 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
110 ; CHECK-NEXT: vmv.x.s a1, v0
111 ; CHECK-NEXT: andi a2, a1, 1
112 ; CHECK-NEXT: bnez a2, .LBB3_9
113 ; CHECK-NEXT: # %bb.1: # %else
114 ; CHECK-NEXT: andi a2, a1, 2
115 ; CHECK-NEXT: bnez a2, .LBB3_10
116 ; CHECK-NEXT: .LBB3_2: # %else2
117 ; CHECK-NEXT: andi a2, a1, 4
118 ; CHECK-NEXT: bnez a2, .LBB3_11
119 ; CHECK-NEXT: .LBB3_3: # %else6
120 ; CHECK-NEXT: andi a2, a1, 8
121 ; CHECK-NEXT: bnez a2, .LBB3_12
122 ; CHECK-NEXT: .LBB3_4: # %else10
123 ; CHECK-NEXT: andi a2, a1, 16
124 ; CHECK-NEXT: bnez a2, .LBB3_13
125 ; CHECK-NEXT: .LBB3_5: # %else14
126 ; CHECK-NEXT: andi a2, a1, 32
127 ; CHECK-NEXT: bnez a2, .LBB3_14
128 ; CHECK-NEXT: .LBB3_6: # %else18
129 ; CHECK-NEXT: andi a2, a1, 64
130 ; CHECK-NEXT: bnez a2, .LBB3_15
131 ; CHECK-NEXT: .LBB3_7: # %else22
132 ; CHECK-NEXT: andi a1, a1, -128
133 ; CHECK-NEXT: bnez a1, .LBB3_16
134 ; CHECK-NEXT: .LBB3_8: # %else26
136 ; CHECK-NEXT: .LBB3_9: # %cond.load
137 ; CHECK-NEXT: lbu a2, 0(a0)
138 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma
139 ; CHECK-NEXT: vmv.s.x v8, a2
140 ; CHECK-NEXT: addi a0, a0, 1
141 ; CHECK-NEXT: andi a2, a1, 2
142 ; CHECK-NEXT: beqz a2, .LBB3_2
143 ; CHECK-NEXT: .LBB3_10: # %cond.load1
144 ; CHECK-NEXT: lbu a2, 0(a0)
145 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
146 ; CHECK-NEXT: vmv.s.x v9, a2
147 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
148 ; CHECK-NEXT: vslideup.vi v8, v9, 1
149 ; CHECK-NEXT: addi a0, a0, 1
150 ; CHECK-NEXT: andi a2, a1, 4
151 ; CHECK-NEXT: beqz a2, .LBB3_3
152 ; CHECK-NEXT: .LBB3_11: # %cond.load5
153 ; CHECK-NEXT: lbu a2, 0(a0)
154 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
155 ; CHECK-NEXT: vmv.s.x v9, a2
156 ; CHECK-NEXT: vslideup.vi v8, v9, 2
157 ; CHECK-NEXT: addi a0, a0, 1
158 ; CHECK-NEXT: andi a2, a1, 8
159 ; CHECK-NEXT: beqz a2, .LBB3_4
160 ; CHECK-NEXT: .LBB3_12: # %cond.load9
161 ; CHECK-NEXT: lbu a2, 0(a0)
162 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
163 ; CHECK-NEXT: vmv.s.x v9, a2
164 ; CHECK-NEXT: vslideup.vi v8, v9, 3
165 ; CHECK-NEXT: addi a0, a0, 1
166 ; CHECK-NEXT: andi a2, a1, 16
167 ; CHECK-NEXT: beqz a2, .LBB3_5
168 ; CHECK-NEXT: .LBB3_13: # %cond.load13
169 ; CHECK-NEXT: lbu a2, 0(a0)
170 ; CHECK-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
171 ; CHECK-NEXT: vmv.s.x v9, a2
172 ; CHECK-NEXT: vslideup.vi v8, v9, 4
173 ; CHECK-NEXT: addi a0, a0, 1
174 ; CHECK-NEXT: andi a2, a1, 32
175 ; CHECK-NEXT: beqz a2, .LBB3_6
176 ; CHECK-NEXT: .LBB3_14: # %cond.load17
177 ; CHECK-NEXT: lbu a2, 0(a0)
178 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
179 ; CHECK-NEXT: vmv.s.x v9, a2
180 ; CHECK-NEXT: vslideup.vi v8, v9, 5
181 ; CHECK-NEXT: addi a0, a0, 1
182 ; CHECK-NEXT: andi a2, a1, 64
183 ; CHECK-NEXT: beqz a2, .LBB3_7
184 ; CHECK-NEXT: .LBB3_15: # %cond.load21
185 ; CHECK-NEXT: lbu a2, 0(a0)
186 ; CHECK-NEXT: vsetivli zero, 7, e8, mf2, tu, ma
187 ; CHECK-NEXT: vmv.s.x v9, a2
188 ; CHECK-NEXT: vslideup.vi v8, v9, 6
189 ; CHECK-NEXT: addi a0, a0, 1
190 ; CHECK-NEXT: andi a1, a1, -128
191 ; CHECK-NEXT: beqz a1, .LBB3_8
192 ; CHECK-NEXT: .LBB3_16: # %cond.load25
193 ; CHECK-NEXT: lbu a0, 0(a0)
194 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
195 ; CHECK-NEXT: vmv.s.x v9, a0
196 ; CHECK-NEXT: vslideup.vi v8, v9, 7
198 %res = call <8 x i8> @llvm.masked.expandload.v8i8(ptr %base, <8 x i1> %mask, <8 x i8> %src0)
202 declare <1 x i16> @llvm.masked.expandload.v1i16(ptr, <1 x i1>, <1 x i16>)
203 define <1 x i16> @expandload_v1i16(ptr %base, <1 x i16> %src0, <1 x i1> %mask) {
204 ; CHECK-LABEL: expandload_v1i16:
206 ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
207 ; CHECK-NEXT: vfirst.m a1, v0
208 ; CHECK-NEXT: bnez a1, .LBB4_2
209 ; CHECK-NEXT: # %bb.1: # %cond.load
210 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
211 ; CHECK-NEXT: vle16.v v8, (a0)
212 ; CHECK-NEXT: .LBB4_2: # %else
214 %res = call <1 x i16> @llvm.masked.expandload.v1i16(ptr align 2 %base, <1 x i1> %mask, <1 x i16> %src0)
218 declare <2 x i16> @llvm.masked.expandload.v2i16(ptr, <2 x i1>, <2 x i16>)
219 define <2 x i16> @expandload_v2i16(ptr %base, <2 x i16> %src0, <2 x i1> %mask) {
220 ; CHECK-LABEL: expandload_v2i16:
222 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
223 ; CHECK-NEXT: vmv.x.s a1, v0
224 ; CHECK-NEXT: andi a2, a1, 1
225 ; CHECK-NEXT: bnez a2, .LBB5_3
226 ; CHECK-NEXT: # %bb.1: # %else
227 ; CHECK-NEXT: andi a1, a1, 2
228 ; CHECK-NEXT: bnez a1, .LBB5_4
229 ; CHECK-NEXT: .LBB5_2: # %else2
231 ; CHECK-NEXT: .LBB5_3: # %cond.load
232 ; CHECK-NEXT: lh a2, 0(a0)
233 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, ma
234 ; CHECK-NEXT: vmv.s.x v8, a2
235 ; CHECK-NEXT: addi a0, a0, 2
236 ; CHECK-NEXT: andi a1, a1, 2
237 ; CHECK-NEXT: beqz a1, .LBB5_2
238 ; CHECK-NEXT: .LBB5_4: # %cond.load1
239 ; CHECK-NEXT: lh a0, 0(a0)
240 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
241 ; CHECK-NEXT: vmv.s.x v9, a0
242 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
243 ; CHECK-NEXT: vslideup.vi v8, v9, 1
245 %res = call <2 x i16> @llvm.masked.expandload.v2i16(ptr align 2 %base, <2 x i1> %mask, <2 x i16> %src0)
249 declare <4 x i16> @llvm.masked.expandload.v4i16(ptr, <4 x i1>, <4 x i16>)
250 define <4 x i16> @expandload_v4i16(ptr %base, <4 x i16> %src0, <4 x i1> %mask) {
251 ; CHECK-LABEL: expandload_v4i16:
253 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
254 ; CHECK-NEXT: vmv.x.s a1, v0
255 ; CHECK-NEXT: andi a2, a1, 1
256 ; CHECK-NEXT: bnez a2, .LBB6_5
257 ; CHECK-NEXT: # %bb.1: # %else
258 ; CHECK-NEXT: andi a2, a1, 2
259 ; CHECK-NEXT: bnez a2, .LBB6_6
260 ; CHECK-NEXT: .LBB6_2: # %else2
261 ; CHECK-NEXT: andi a2, a1, 4
262 ; CHECK-NEXT: bnez a2, .LBB6_7
263 ; CHECK-NEXT: .LBB6_3: # %else6
264 ; CHECK-NEXT: andi a1, a1, 8
265 ; CHECK-NEXT: bnez a1, .LBB6_8
266 ; CHECK-NEXT: .LBB6_4: # %else10
268 ; CHECK-NEXT: .LBB6_5: # %cond.load
269 ; CHECK-NEXT: lh a2, 0(a0)
270 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, ma
271 ; CHECK-NEXT: vmv.s.x v8, a2
272 ; CHECK-NEXT: addi a0, a0, 2
273 ; CHECK-NEXT: andi a2, a1, 2
274 ; CHECK-NEXT: beqz a2, .LBB6_2
275 ; CHECK-NEXT: .LBB6_6: # %cond.load1
276 ; CHECK-NEXT: lh a2, 0(a0)
277 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
278 ; CHECK-NEXT: vmv.s.x v9, a2
279 ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
280 ; CHECK-NEXT: vslideup.vi v8, v9, 1
281 ; CHECK-NEXT: addi a0, a0, 2
282 ; CHECK-NEXT: andi a2, a1, 4
283 ; CHECK-NEXT: beqz a2, .LBB6_3
284 ; CHECK-NEXT: .LBB6_7: # %cond.load5
285 ; CHECK-NEXT: lh a2, 0(a0)
286 ; CHECK-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
287 ; CHECK-NEXT: vmv.s.x v9, a2
288 ; CHECK-NEXT: vslideup.vi v8, v9, 2
289 ; CHECK-NEXT: addi a0, a0, 2
290 ; CHECK-NEXT: andi a1, a1, 8
291 ; CHECK-NEXT: beqz a1, .LBB6_4
292 ; CHECK-NEXT: .LBB6_8: # %cond.load9
293 ; CHECK-NEXT: lh a0, 0(a0)
294 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
295 ; CHECK-NEXT: vmv.s.x v9, a0
296 ; CHECK-NEXT: vslideup.vi v8, v9, 3
298 %res = call <4 x i16> @llvm.masked.expandload.v4i16(ptr align 2 %base, <4 x i1> %mask, <4 x i16> %src0)
302 declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>)
303 define <8 x i16> @expandload_v8i16(ptr %base, <8 x i16> %src0, <8 x i1> %mask) {
304 ; CHECK-LABEL: expandload_v8i16:
306 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
307 ; CHECK-NEXT: vmv.x.s a1, v0
308 ; CHECK-NEXT: andi a2, a1, 1
309 ; CHECK-NEXT: bnez a2, .LBB7_9
310 ; CHECK-NEXT: # %bb.1: # %else
311 ; CHECK-NEXT: andi a2, a1, 2
312 ; CHECK-NEXT: bnez a2, .LBB7_10
313 ; CHECK-NEXT: .LBB7_2: # %else2
314 ; CHECK-NEXT: andi a2, a1, 4
315 ; CHECK-NEXT: bnez a2, .LBB7_11
316 ; CHECK-NEXT: .LBB7_3: # %else6
317 ; CHECK-NEXT: andi a2, a1, 8
318 ; CHECK-NEXT: bnez a2, .LBB7_12
319 ; CHECK-NEXT: .LBB7_4: # %else10
320 ; CHECK-NEXT: andi a2, a1, 16
321 ; CHECK-NEXT: bnez a2, .LBB7_13
322 ; CHECK-NEXT: .LBB7_5: # %else14
323 ; CHECK-NEXT: andi a2, a1, 32
324 ; CHECK-NEXT: bnez a2, .LBB7_14
325 ; CHECK-NEXT: .LBB7_6: # %else18
326 ; CHECK-NEXT: andi a2, a1, 64
327 ; CHECK-NEXT: bnez a2, .LBB7_15
328 ; CHECK-NEXT: .LBB7_7: # %else22
329 ; CHECK-NEXT: andi a1, a1, -128
330 ; CHECK-NEXT: bnez a1, .LBB7_16
331 ; CHECK-NEXT: .LBB7_8: # %else26
333 ; CHECK-NEXT: .LBB7_9: # %cond.load
334 ; CHECK-NEXT: lh a2, 0(a0)
335 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, ma
336 ; CHECK-NEXT: vmv.s.x v8, a2
337 ; CHECK-NEXT: addi a0, a0, 2
338 ; CHECK-NEXT: andi a2, a1, 2
339 ; CHECK-NEXT: beqz a2, .LBB7_2
340 ; CHECK-NEXT: .LBB7_10: # %cond.load1
341 ; CHECK-NEXT: lh a2, 0(a0)
342 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
343 ; CHECK-NEXT: vmv.s.x v9, a2
344 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, tu, ma
345 ; CHECK-NEXT: vslideup.vi v8, v9, 1
346 ; CHECK-NEXT: addi a0, a0, 2
347 ; CHECK-NEXT: andi a2, a1, 4
348 ; CHECK-NEXT: beqz a2, .LBB7_3
349 ; CHECK-NEXT: .LBB7_11: # %cond.load5
350 ; CHECK-NEXT: lh a2, 0(a0)
351 ; CHECK-NEXT: vsetivli zero, 3, e16, m1, tu, ma
352 ; CHECK-NEXT: vmv.s.x v9, a2
353 ; CHECK-NEXT: vslideup.vi v8, v9, 2
354 ; CHECK-NEXT: addi a0, a0, 2
355 ; CHECK-NEXT: andi a2, a1, 8
356 ; CHECK-NEXT: beqz a2, .LBB7_4
357 ; CHECK-NEXT: .LBB7_12: # %cond.load9
358 ; CHECK-NEXT: lh a2, 0(a0)
359 ; CHECK-NEXT: vsetivli zero, 4, e16, m1, tu, ma
360 ; CHECK-NEXT: vmv.s.x v9, a2
361 ; CHECK-NEXT: vslideup.vi v8, v9, 3
362 ; CHECK-NEXT: addi a0, a0, 2
363 ; CHECK-NEXT: andi a2, a1, 16
364 ; CHECK-NEXT: beqz a2, .LBB7_5
365 ; CHECK-NEXT: .LBB7_13: # %cond.load13
366 ; CHECK-NEXT: lh a2, 0(a0)
367 ; CHECK-NEXT: vsetivli zero, 5, e16, m1, tu, ma
368 ; CHECK-NEXT: vmv.s.x v9, a2
369 ; CHECK-NEXT: vslideup.vi v8, v9, 4
370 ; CHECK-NEXT: addi a0, a0, 2
371 ; CHECK-NEXT: andi a2, a1, 32
372 ; CHECK-NEXT: beqz a2, .LBB7_6
373 ; CHECK-NEXT: .LBB7_14: # %cond.load17
374 ; CHECK-NEXT: lh a2, 0(a0)
375 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, tu, ma
376 ; CHECK-NEXT: vmv.s.x v9, a2
377 ; CHECK-NEXT: vslideup.vi v8, v9, 5
378 ; CHECK-NEXT: addi a0, a0, 2
379 ; CHECK-NEXT: andi a2, a1, 64
380 ; CHECK-NEXT: beqz a2, .LBB7_7
381 ; CHECK-NEXT: .LBB7_15: # %cond.load21
382 ; CHECK-NEXT: lh a2, 0(a0)
383 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma
384 ; CHECK-NEXT: vmv.s.x v9, a2
385 ; CHECK-NEXT: vslideup.vi v8, v9, 6
386 ; CHECK-NEXT: addi a0, a0, 2
387 ; CHECK-NEXT: andi a1, a1, -128
388 ; CHECK-NEXT: beqz a1, .LBB7_8
389 ; CHECK-NEXT: .LBB7_16: # %cond.load25
390 ; CHECK-NEXT: lh a0, 0(a0)
391 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
392 ; CHECK-NEXT: vmv.s.x v9, a0
393 ; CHECK-NEXT: vslideup.vi v8, v9, 7
395 %res = call <8 x i16> @llvm.masked.expandload.v8i16(ptr align 2 %base, <8 x i1> %mask, <8 x i16> %src0)
399 declare <1 x i32> @llvm.masked.expandload.v1i32(ptr, <1 x i1>, <1 x i32>)
400 define <1 x i32> @expandload_v1i32(ptr %base, <1 x i32> %src0, <1 x i1> %mask) {
401 ; CHECK-LABEL: expandload_v1i32:
403 ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
404 ; CHECK-NEXT: vfirst.m a1, v0
405 ; CHECK-NEXT: bnez a1, .LBB8_2
406 ; CHECK-NEXT: # %bb.1: # %cond.load
407 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
408 ; CHECK-NEXT: vle32.v v8, (a0)
409 ; CHECK-NEXT: .LBB8_2: # %else
411 %res = call <1 x i32> @llvm.masked.expandload.v1i32(ptr align 4 %base, <1 x i1> %mask, <1 x i32> %src0)
415 declare <2 x i32> @llvm.masked.expandload.v2i32(ptr, <2 x i1>, <2 x i32>)
416 define <2 x i32> @expandload_v2i32(ptr %base, <2 x i32> %src0, <2 x i1> %mask) {
417 ; CHECK-LABEL: expandload_v2i32:
419 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
420 ; CHECK-NEXT: vmv.x.s a1, v0
421 ; CHECK-NEXT: andi a2, a1, 1
422 ; CHECK-NEXT: bnez a2, .LBB9_3
423 ; CHECK-NEXT: # %bb.1: # %else
424 ; CHECK-NEXT: andi a1, a1, 2
425 ; CHECK-NEXT: bnez a1, .LBB9_4
426 ; CHECK-NEXT: .LBB9_2: # %else2
428 ; CHECK-NEXT: .LBB9_3: # %cond.load
429 ; CHECK-NEXT: lw a2, 0(a0)
430 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, ma
431 ; CHECK-NEXT: vmv.s.x v8, a2
432 ; CHECK-NEXT: addi a0, a0, 4
433 ; CHECK-NEXT: andi a1, a1, 2
434 ; CHECK-NEXT: beqz a1, .LBB9_2
435 ; CHECK-NEXT: .LBB9_4: # %cond.load1
436 ; CHECK-NEXT: lw a0, 0(a0)
437 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
438 ; CHECK-NEXT: vmv.s.x v9, a0
439 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
440 ; CHECK-NEXT: vslideup.vi v8, v9, 1
442 %res = call <2 x i32> @llvm.masked.expandload.v2i32(ptr align 4 %base, <2 x i1> %mask, <2 x i32> %src0)
446 declare <4 x i32> @llvm.masked.expandload.v4i32(ptr, <4 x i1>, <4 x i32>)
447 define <4 x i32> @expandload_v4i32(ptr %base, <4 x i32> %src0, <4 x i1> %mask) {
448 ; CHECK-LABEL: expandload_v4i32:
450 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
451 ; CHECK-NEXT: vmv.x.s a1, v0
452 ; CHECK-NEXT: andi a2, a1, 1
453 ; CHECK-NEXT: bnez a2, .LBB10_5
454 ; CHECK-NEXT: # %bb.1: # %else
455 ; CHECK-NEXT: andi a2, a1, 2
456 ; CHECK-NEXT: bnez a2, .LBB10_6
457 ; CHECK-NEXT: .LBB10_2: # %else2
458 ; CHECK-NEXT: andi a2, a1, 4
459 ; CHECK-NEXT: bnez a2, .LBB10_7
460 ; CHECK-NEXT: .LBB10_3: # %else6
461 ; CHECK-NEXT: andi a1, a1, 8
462 ; CHECK-NEXT: bnez a1, .LBB10_8
463 ; CHECK-NEXT: .LBB10_4: # %else10
465 ; CHECK-NEXT: .LBB10_5: # %cond.load
466 ; CHECK-NEXT: lw a2, 0(a0)
467 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, ma
468 ; CHECK-NEXT: vmv.s.x v8, a2
469 ; CHECK-NEXT: addi a0, a0, 4
470 ; CHECK-NEXT: andi a2, a1, 2
471 ; CHECK-NEXT: beqz a2, .LBB10_2
472 ; CHECK-NEXT: .LBB10_6: # %cond.load1
473 ; CHECK-NEXT: lw a2, 0(a0)
474 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
475 ; CHECK-NEXT: vmv.s.x v9, a2
476 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
477 ; CHECK-NEXT: vslideup.vi v8, v9, 1
478 ; CHECK-NEXT: addi a0, a0, 4
479 ; CHECK-NEXT: andi a2, a1, 4
480 ; CHECK-NEXT: beqz a2, .LBB10_3
481 ; CHECK-NEXT: .LBB10_7: # %cond.load5
482 ; CHECK-NEXT: lw a2, 0(a0)
483 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
484 ; CHECK-NEXT: vmv.s.x v9, a2
485 ; CHECK-NEXT: vslideup.vi v8, v9, 2
486 ; CHECK-NEXT: addi a0, a0, 4
487 ; CHECK-NEXT: andi a1, a1, 8
488 ; CHECK-NEXT: beqz a1, .LBB10_4
489 ; CHECK-NEXT: .LBB10_8: # %cond.load9
490 ; CHECK-NEXT: lw a0, 0(a0)
491 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
492 ; CHECK-NEXT: vmv.s.x v9, a0
493 ; CHECK-NEXT: vslideup.vi v8, v9, 3
495 %res = call <4 x i32> @llvm.masked.expandload.v4i32(ptr align 4 %base, <4 x i1> %mask, <4 x i32> %src0)
499 declare <8 x i32> @llvm.masked.expandload.v8i32(ptr, <8 x i1>, <8 x i32>)
500 define <8 x i32> @expandload_v8i32(ptr %base, <8 x i32> %src0, <8 x i1> %mask) {
501 ; CHECK-LABEL: expandload_v8i32:
503 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma
504 ; CHECK-NEXT: vmv.x.s a1, v0
505 ; CHECK-NEXT: andi a2, a1, 1
506 ; CHECK-NEXT: bnez a2, .LBB11_9
507 ; CHECK-NEXT: # %bb.1: # %else
508 ; CHECK-NEXT: andi a2, a1, 2
509 ; CHECK-NEXT: bnez a2, .LBB11_10
510 ; CHECK-NEXT: .LBB11_2: # %else2
511 ; CHECK-NEXT: andi a2, a1, 4
512 ; CHECK-NEXT: bnez a2, .LBB11_11
513 ; CHECK-NEXT: .LBB11_3: # %else6
514 ; CHECK-NEXT: andi a2, a1, 8
515 ; CHECK-NEXT: bnez a2, .LBB11_12
516 ; CHECK-NEXT: .LBB11_4: # %else10
517 ; CHECK-NEXT: andi a2, a1, 16
518 ; CHECK-NEXT: bnez a2, .LBB11_13
519 ; CHECK-NEXT: .LBB11_5: # %else14
520 ; CHECK-NEXT: andi a2, a1, 32
521 ; CHECK-NEXT: bnez a2, .LBB11_14
522 ; CHECK-NEXT: .LBB11_6: # %else18
523 ; CHECK-NEXT: andi a2, a1, 64
524 ; CHECK-NEXT: bnez a2, .LBB11_15
525 ; CHECK-NEXT: .LBB11_7: # %else22
526 ; CHECK-NEXT: andi a1, a1, -128
527 ; CHECK-NEXT: bnez a1, .LBB11_16
528 ; CHECK-NEXT: .LBB11_8: # %else26
530 ; CHECK-NEXT: .LBB11_9: # %cond.load
531 ; CHECK-NEXT: lw a2, 0(a0)
532 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, ma
533 ; CHECK-NEXT: vmv.s.x v8, a2
534 ; CHECK-NEXT: addi a0, a0, 4
535 ; CHECK-NEXT: andi a2, a1, 2
536 ; CHECK-NEXT: beqz a2, .LBB11_2
537 ; CHECK-NEXT: .LBB11_10: # %cond.load1
538 ; CHECK-NEXT: lw a2, 0(a0)
539 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
540 ; CHECK-NEXT: vmv.s.x v10, a2
541 ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
542 ; CHECK-NEXT: vslideup.vi v8, v10, 1
543 ; CHECK-NEXT: addi a0, a0, 4
544 ; CHECK-NEXT: andi a2, a1, 4
545 ; CHECK-NEXT: beqz a2, .LBB11_3
546 ; CHECK-NEXT: .LBB11_11: # %cond.load5
547 ; CHECK-NEXT: lw a2, 0(a0)
548 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma
549 ; CHECK-NEXT: vmv.s.x v10, a2
550 ; CHECK-NEXT: vslideup.vi v8, v10, 2
551 ; CHECK-NEXT: addi a0, a0, 4
552 ; CHECK-NEXT: andi a2, a1, 8
553 ; CHECK-NEXT: beqz a2, .LBB11_4
554 ; CHECK-NEXT: .LBB11_12: # %cond.load9
555 ; CHECK-NEXT: lw a2, 0(a0)
556 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
557 ; CHECK-NEXT: vmv.s.x v10, a2
558 ; CHECK-NEXT: vslideup.vi v8, v10, 3
559 ; CHECK-NEXT: addi a0, a0, 4
560 ; CHECK-NEXT: andi a2, a1, 16
561 ; CHECK-NEXT: beqz a2, .LBB11_5
562 ; CHECK-NEXT: .LBB11_13: # %cond.load13
563 ; CHECK-NEXT: lw a2, 0(a0)
564 ; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
565 ; CHECK-NEXT: vmv.s.x v10, a2
566 ; CHECK-NEXT: vslideup.vi v8, v10, 4
567 ; CHECK-NEXT: addi a0, a0, 4
568 ; CHECK-NEXT: andi a2, a1, 32
569 ; CHECK-NEXT: beqz a2, .LBB11_6
570 ; CHECK-NEXT: .LBB11_14: # %cond.load17
571 ; CHECK-NEXT: lw a2, 0(a0)
572 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma
573 ; CHECK-NEXT: vmv.s.x v10, a2
574 ; CHECK-NEXT: vslideup.vi v8, v10, 5
575 ; CHECK-NEXT: addi a0, a0, 4
576 ; CHECK-NEXT: andi a2, a1, 64
577 ; CHECK-NEXT: beqz a2, .LBB11_7
578 ; CHECK-NEXT: .LBB11_15: # %cond.load21
579 ; CHECK-NEXT: lw a2, 0(a0)
580 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma
581 ; CHECK-NEXT: vmv.s.x v10, a2
582 ; CHECK-NEXT: vslideup.vi v8, v10, 6
583 ; CHECK-NEXT: addi a0, a0, 4
584 ; CHECK-NEXT: andi a1, a1, -128
585 ; CHECK-NEXT: beqz a1, .LBB11_8
586 ; CHECK-NEXT: .LBB11_16: # %cond.load25
587 ; CHECK-NEXT: lw a0, 0(a0)
588 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
589 ; CHECK-NEXT: vmv.s.x v10, a0
590 ; CHECK-NEXT: vslideup.vi v8, v10, 7
592 %res = call <8 x i32> @llvm.masked.expandload.v8i32(ptr align 4 %base, <8 x i1> %mask, <8 x i32> %src0)
596 declare <1 x i64> @llvm.masked.expandload.v1i64(ptr, <1 x i1>, <1 x i64>)
597 define <1 x i64> @expandload_v1i64(ptr %base, <1 x i64> %src0, <1 x i1> %mask) {
598 ; RV32-LABEL: expandload_v1i64:
600 ; RV32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
601 ; RV32-NEXT: vfirst.m a1, v0
602 ; RV32-NEXT: bnez a1, .LBB12_2
603 ; RV32-NEXT: # %bb.1: # %cond.load
604 ; RV32-NEXT: addi sp, sp, -16
605 ; RV32-NEXT: .cfi_def_cfa_offset 16
606 ; RV32-NEXT: lw a1, 4(a0)
607 ; RV32-NEXT: lw a0, 0(a0)
608 ; RV32-NEXT: sw a1, 12(sp)
609 ; RV32-NEXT: sw a0, 8(sp)
610 ; RV32-NEXT: addi a0, sp, 8
611 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
612 ; RV32-NEXT: vlse64.v v8, (a0), zero
613 ; RV32-NEXT: addi sp, sp, 16
614 ; RV32-NEXT: .LBB12_2: # %else
617 ; RV64-LABEL: expandload_v1i64:
619 ; RV64-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
620 ; RV64-NEXT: vfirst.m a1, v0
621 ; RV64-NEXT: bnez a1, .LBB12_2
622 ; RV64-NEXT: # %bb.1: # %cond.load
623 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
624 ; RV64-NEXT: vle64.v v8, (a0)
625 ; RV64-NEXT: .LBB12_2: # %else
627 %res = call <1 x i64> @llvm.masked.expandload.v1i64(ptr align 8 %base, <1 x i1> %mask, <1 x i64> %src0)
631 declare <2 x i64> @llvm.masked.expandload.v2i64(ptr, <2 x i1>, <2 x i64>)
632 define <2 x i64> @expandload_v2i64(ptr %base, <2 x i64> %src0, <2 x i1> %mask) {
633 ; RV32-LABEL: expandload_v2i64:
635 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
636 ; RV32-NEXT: vmv.x.s a1, v0
637 ; RV32-NEXT: andi a2, a1, 1
638 ; RV32-NEXT: bnez a2, .LBB13_3
639 ; RV32-NEXT: # %bb.1: # %else
640 ; RV32-NEXT: andi a1, a1, 2
641 ; RV32-NEXT: bnez a1, .LBB13_4
642 ; RV32-NEXT: .LBB13_2: # %else2
644 ; RV32-NEXT: .LBB13_3: # %cond.load
645 ; RV32-NEXT: lw a2, 0(a0)
646 ; RV32-NEXT: lw a3, 4(a0)
647 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
648 ; RV32-NEXT: vslide1down.vx v8, v8, a2
649 ; RV32-NEXT: vslide1down.vx v8, v8, a3
650 ; RV32-NEXT: addi a0, a0, 8
651 ; RV32-NEXT: andi a1, a1, 2
652 ; RV32-NEXT: beqz a1, .LBB13_2
653 ; RV32-NEXT: .LBB13_4: # %cond.load1
654 ; RV32-NEXT: lw a1, 0(a0)
655 ; RV32-NEXT: lw a0, 4(a0)
656 ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
657 ; RV32-NEXT: vslide1down.vx v9, v8, a1
658 ; RV32-NEXT: vslide1down.vx v9, v9, a0
659 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
660 ; RV32-NEXT: vslideup.vi v8, v9, 1
663 ; RV64-LABEL: expandload_v2i64:
665 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
666 ; RV64-NEXT: vmv.x.s a1, v0
667 ; RV64-NEXT: andi a2, a1, 1
668 ; RV64-NEXT: bnez a2, .LBB13_3
669 ; RV64-NEXT: # %bb.1: # %else
670 ; RV64-NEXT: andi a1, a1, 2
671 ; RV64-NEXT: bnez a1, .LBB13_4
672 ; RV64-NEXT: .LBB13_2: # %else2
674 ; RV64-NEXT: .LBB13_3: # %cond.load
675 ; RV64-NEXT: ld a2, 0(a0)
676 ; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma
677 ; RV64-NEXT: vmv.s.x v8, a2
678 ; RV64-NEXT: addi a0, a0, 8
679 ; RV64-NEXT: andi a1, a1, 2
680 ; RV64-NEXT: beqz a1, .LBB13_2
681 ; RV64-NEXT: .LBB13_4: # %cond.load1
682 ; RV64-NEXT: ld a0, 0(a0)
683 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
684 ; RV64-NEXT: vmv.s.x v9, a0
685 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
686 ; RV64-NEXT: vslideup.vi v8, v9, 1
688 %res = call <2 x i64> @llvm.masked.expandload.v2i64(ptr align 8 %base, <2 x i1> %mask, <2 x i64> %src0)
692 declare <4 x i64> @llvm.masked.expandload.v4i64(ptr, <4 x i1>, <4 x i64>)
693 define <4 x i64> @expandload_v4i64(ptr %base, <4 x i64> %src0, <4 x i1> %mask) {
694 ; RV32-LABEL: expandload_v4i64:
696 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
697 ; RV32-NEXT: vmv.x.s a1, v0
698 ; RV32-NEXT: andi a2, a1, 1
699 ; RV32-NEXT: bnez a2, .LBB14_5
700 ; RV32-NEXT: # %bb.1: # %else
701 ; RV32-NEXT: andi a2, a1, 2
702 ; RV32-NEXT: bnez a2, .LBB14_6
703 ; RV32-NEXT: .LBB14_2: # %else2
704 ; RV32-NEXT: andi a2, a1, 4
705 ; RV32-NEXT: bnez a2, .LBB14_7
706 ; RV32-NEXT: .LBB14_3: # %else6
707 ; RV32-NEXT: andi a1, a1, 8
708 ; RV32-NEXT: bnez a1, .LBB14_8
709 ; RV32-NEXT: .LBB14_4: # %else10
711 ; RV32-NEXT: .LBB14_5: # %cond.load
712 ; RV32-NEXT: lw a2, 0(a0)
713 ; RV32-NEXT: lw a3, 4(a0)
714 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
715 ; RV32-NEXT: vslide1down.vx v8, v8, a2
716 ; RV32-NEXT: vslide1down.vx v8, v8, a3
717 ; RV32-NEXT: addi a0, a0, 8
718 ; RV32-NEXT: andi a2, a1, 2
719 ; RV32-NEXT: beqz a2, .LBB14_2
720 ; RV32-NEXT: .LBB14_6: # %cond.load1
721 ; RV32-NEXT: lw a2, 0(a0)
722 ; RV32-NEXT: lw a3, 4(a0)
723 ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
724 ; RV32-NEXT: vslide1down.vx v10, v8, a2
725 ; RV32-NEXT: vslide1down.vx v10, v10, a3
726 ; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma
727 ; RV32-NEXT: vslideup.vi v8, v10, 1
728 ; RV32-NEXT: addi a0, a0, 8
729 ; RV32-NEXT: andi a2, a1, 4
730 ; RV32-NEXT: beqz a2, .LBB14_3
731 ; RV32-NEXT: .LBB14_7: # %cond.load5
732 ; RV32-NEXT: lw a2, 0(a0)
733 ; RV32-NEXT: lw a3, 4(a0)
734 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
735 ; RV32-NEXT: vslide1down.vx v10, v8, a2
736 ; RV32-NEXT: vslide1down.vx v10, v10, a3
737 ; RV32-NEXT: vsetivli zero, 3, e64, m2, tu, ma
738 ; RV32-NEXT: vslideup.vi v8, v10, 2
739 ; RV32-NEXT: addi a0, a0, 8
740 ; RV32-NEXT: andi a1, a1, 8
741 ; RV32-NEXT: beqz a1, .LBB14_4
742 ; RV32-NEXT: .LBB14_8: # %cond.load9
743 ; RV32-NEXT: lw a1, 0(a0)
744 ; RV32-NEXT: lw a0, 4(a0)
745 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
746 ; RV32-NEXT: vslide1down.vx v10, v8, a1
747 ; RV32-NEXT: vslide1down.vx v10, v10, a0
748 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
749 ; RV32-NEXT: vslideup.vi v8, v10, 3
752 ; RV64-LABEL: expandload_v4i64:
754 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
755 ; RV64-NEXT: vmv.x.s a1, v0
756 ; RV64-NEXT: andi a2, a1, 1
757 ; RV64-NEXT: bnez a2, .LBB14_5
758 ; RV64-NEXT: # %bb.1: # %else
759 ; RV64-NEXT: andi a2, a1, 2
760 ; RV64-NEXT: bnez a2, .LBB14_6
761 ; RV64-NEXT: .LBB14_2: # %else2
762 ; RV64-NEXT: andi a2, a1, 4
763 ; RV64-NEXT: bnez a2, .LBB14_7
764 ; RV64-NEXT: .LBB14_3: # %else6
765 ; RV64-NEXT: andi a1, a1, 8
766 ; RV64-NEXT: bnez a1, .LBB14_8
767 ; RV64-NEXT: .LBB14_4: # %else10
769 ; RV64-NEXT: .LBB14_5: # %cond.load
770 ; RV64-NEXT: ld a2, 0(a0)
771 ; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma
772 ; RV64-NEXT: vmv.s.x v8, a2
773 ; RV64-NEXT: addi a0, a0, 8
774 ; RV64-NEXT: andi a2, a1, 2
775 ; RV64-NEXT: beqz a2, .LBB14_2
776 ; RV64-NEXT: .LBB14_6: # %cond.load1
777 ; RV64-NEXT: ld a2, 0(a0)
778 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
779 ; RV64-NEXT: vmv.s.x v10, a2
780 ; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma
781 ; RV64-NEXT: vslideup.vi v8, v10, 1
782 ; RV64-NEXT: addi a0, a0, 8
783 ; RV64-NEXT: andi a2, a1, 4
784 ; RV64-NEXT: beqz a2, .LBB14_3
785 ; RV64-NEXT: .LBB14_7: # %cond.load5
786 ; RV64-NEXT: ld a2, 0(a0)
787 ; RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma
788 ; RV64-NEXT: vmv.s.x v10, a2
789 ; RV64-NEXT: vslideup.vi v8, v10, 2
790 ; RV64-NEXT: addi a0, a0, 8
791 ; RV64-NEXT: andi a1, a1, 8
792 ; RV64-NEXT: beqz a1, .LBB14_4
793 ; RV64-NEXT: .LBB14_8: # %cond.load9
794 ; RV64-NEXT: ld a0, 0(a0)
795 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
796 ; RV64-NEXT: vmv.s.x v10, a0
797 ; RV64-NEXT: vslideup.vi v8, v10, 3
799 %res = call <4 x i64> @llvm.masked.expandload.v4i64(ptr align 8 %base, <4 x i1> %mask, <4 x i64> %src0)
803 declare <8 x i64> @llvm.masked.expandload.v8i64(ptr, <8 x i1>, <8 x i64>)
804 define <8 x i64> @expandload_v8i64(ptr %base, <8 x i64> %src0, <8 x i1> %mask) {
805 ; RV32-LABEL: expandload_v8i64:
807 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
808 ; RV32-NEXT: vmv.x.s a1, v0
809 ; RV32-NEXT: andi a2, a1, 1
810 ; RV32-NEXT: bnez a2, .LBB15_9
811 ; RV32-NEXT: # %bb.1: # %else
812 ; RV32-NEXT: andi a2, a1, 2
813 ; RV32-NEXT: bnez a2, .LBB15_10
814 ; RV32-NEXT: .LBB15_2: # %else2
815 ; RV32-NEXT: andi a2, a1, 4
816 ; RV32-NEXT: bnez a2, .LBB15_11
817 ; RV32-NEXT: .LBB15_3: # %else6
818 ; RV32-NEXT: andi a2, a1, 8
819 ; RV32-NEXT: bnez a2, .LBB15_12
820 ; RV32-NEXT: .LBB15_4: # %else10
821 ; RV32-NEXT: andi a2, a1, 16
822 ; RV32-NEXT: bnez a2, .LBB15_13
823 ; RV32-NEXT: .LBB15_5: # %else14
824 ; RV32-NEXT: andi a2, a1, 32
825 ; RV32-NEXT: bnez a2, .LBB15_14
826 ; RV32-NEXT: .LBB15_6: # %else18
827 ; RV32-NEXT: andi a2, a1, 64
828 ; RV32-NEXT: bnez a2, .LBB15_15
829 ; RV32-NEXT: .LBB15_7: # %else22
830 ; RV32-NEXT: andi a1, a1, -128
831 ; RV32-NEXT: bnez a1, .LBB15_16
832 ; RV32-NEXT: .LBB15_8: # %else26
834 ; RV32-NEXT: .LBB15_9: # %cond.load
835 ; RV32-NEXT: lw a2, 0(a0)
836 ; RV32-NEXT: lw a3, 4(a0)
837 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
838 ; RV32-NEXT: vslide1down.vx v8, v8, a2
839 ; RV32-NEXT: vslide1down.vx v8, v8, a3
840 ; RV32-NEXT: addi a0, a0, 8
841 ; RV32-NEXT: andi a2, a1, 2
842 ; RV32-NEXT: beqz a2, .LBB15_2
843 ; RV32-NEXT: .LBB15_10: # %cond.load1
844 ; RV32-NEXT: lw a2, 0(a0)
845 ; RV32-NEXT: lw a3, 4(a0)
846 ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
847 ; RV32-NEXT: vslide1down.vx v12, v8, a2
848 ; RV32-NEXT: vslide1down.vx v12, v12, a3
849 ; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma
850 ; RV32-NEXT: vslideup.vi v8, v12, 1
851 ; RV32-NEXT: addi a0, a0, 8
852 ; RV32-NEXT: andi a2, a1, 4
853 ; RV32-NEXT: beqz a2, .LBB15_3
854 ; RV32-NEXT: .LBB15_11: # %cond.load5
855 ; RV32-NEXT: lw a2, 0(a0)
856 ; RV32-NEXT: lw a3, 4(a0)
857 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
858 ; RV32-NEXT: vslide1down.vx v12, v8, a2
859 ; RV32-NEXT: vslide1down.vx v12, v12, a3
860 ; RV32-NEXT: vsetivli zero, 3, e64, m2, tu, ma
861 ; RV32-NEXT: vslideup.vi v8, v12, 2
862 ; RV32-NEXT: addi a0, a0, 8
863 ; RV32-NEXT: andi a2, a1, 8
864 ; RV32-NEXT: beqz a2, .LBB15_4
865 ; RV32-NEXT: .LBB15_12: # %cond.load9
866 ; RV32-NEXT: lw a2, 0(a0)
867 ; RV32-NEXT: lw a3, 4(a0)
868 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
869 ; RV32-NEXT: vslide1down.vx v12, v8, a2
870 ; RV32-NEXT: vslide1down.vx v12, v12, a3
871 ; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, ma
872 ; RV32-NEXT: vslideup.vi v8, v12, 3
873 ; RV32-NEXT: addi a0, a0, 8
874 ; RV32-NEXT: andi a2, a1, 16
875 ; RV32-NEXT: beqz a2, .LBB15_5
876 ; RV32-NEXT: .LBB15_13: # %cond.load13
877 ; RV32-NEXT: lw a2, 0(a0)
878 ; RV32-NEXT: lw a3, 4(a0)
879 ; RV32-NEXT: vsetivli zero, 2, e32, m4, ta, ma
880 ; RV32-NEXT: vslide1down.vx v12, v8, a2
881 ; RV32-NEXT: vslide1down.vx v12, v12, a3
882 ; RV32-NEXT: vsetivli zero, 5, e64, m4, tu, ma
883 ; RV32-NEXT: vslideup.vi v8, v12, 4
884 ; RV32-NEXT: addi a0, a0, 8
885 ; RV32-NEXT: andi a2, a1, 32
886 ; RV32-NEXT: beqz a2, .LBB15_6
887 ; RV32-NEXT: .LBB15_14: # %cond.load17
888 ; RV32-NEXT: lw a2, 0(a0)
889 ; RV32-NEXT: lw a3, 4(a0)
890 ; RV32-NEXT: vsetivli zero, 2, e32, m4, ta, ma
891 ; RV32-NEXT: vslide1down.vx v12, v8, a2
892 ; RV32-NEXT: vslide1down.vx v12, v12, a3
893 ; RV32-NEXT: vsetivli zero, 6, e64, m4, tu, ma
894 ; RV32-NEXT: vslideup.vi v8, v12, 5
895 ; RV32-NEXT: addi a0, a0, 8
896 ; RV32-NEXT: andi a2, a1, 64
897 ; RV32-NEXT: beqz a2, .LBB15_7
898 ; RV32-NEXT: .LBB15_15: # %cond.load21
899 ; RV32-NEXT: lw a2, 0(a0)
900 ; RV32-NEXT: lw a3, 4(a0)
901 ; RV32-NEXT: vsetivli zero, 2, e32, m4, ta, ma
902 ; RV32-NEXT: vslide1down.vx v12, v8, a2
903 ; RV32-NEXT: vslide1down.vx v12, v12, a3
904 ; RV32-NEXT: vsetivli zero, 7, e64, m4, tu, ma
905 ; RV32-NEXT: vslideup.vi v8, v12, 6
906 ; RV32-NEXT: addi a0, a0, 8
907 ; RV32-NEXT: andi a1, a1, -128
908 ; RV32-NEXT: beqz a1, .LBB15_8
909 ; RV32-NEXT: .LBB15_16: # %cond.load25
910 ; RV32-NEXT: lw a1, 0(a0)
911 ; RV32-NEXT: lw a0, 4(a0)
912 ; RV32-NEXT: vsetivli zero, 2, e32, m4, ta, ma
913 ; RV32-NEXT: vslide1down.vx v12, v8, a1
914 ; RV32-NEXT: vslide1down.vx v12, v12, a0
915 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
916 ; RV32-NEXT: vslideup.vi v8, v12, 7
919 ; RV64-LABEL: expandload_v8i64:
921 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
922 ; RV64-NEXT: vmv.x.s a1, v0
923 ; RV64-NEXT: andi a2, a1, 1
924 ; RV64-NEXT: bnez a2, .LBB15_9
925 ; RV64-NEXT: # %bb.1: # %else
926 ; RV64-NEXT: andi a2, a1, 2
927 ; RV64-NEXT: bnez a2, .LBB15_10
928 ; RV64-NEXT: .LBB15_2: # %else2
929 ; RV64-NEXT: andi a2, a1, 4
930 ; RV64-NEXT: bnez a2, .LBB15_11
931 ; RV64-NEXT: .LBB15_3: # %else6
932 ; RV64-NEXT: andi a2, a1, 8
933 ; RV64-NEXT: bnez a2, .LBB15_12
934 ; RV64-NEXT: .LBB15_4: # %else10
935 ; RV64-NEXT: andi a2, a1, 16
936 ; RV64-NEXT: bnez a2, .LBB15_13
937 ; RV64-NEXT: .LBB15_5: # %else14
938 ; RV64-NEXT: andi a2, a1, 32
939 ; RV64-NEXT: bnez a2, .LBB15_14
940 ; RV64-NEXT: .LBB15_6: # %else18
941 ; RV64-NEXT: andi a2, a1, 64
942 ; RV64-NEXT: bnez a2, .LBB15_15
943 ; RV64-NEXT: .LBB15_7: # %else22
944 ; RV64-NEXT: andi a1, a1, -128
945 ; RV64-NEXT: bnez a1, .LBB15_16
946 ; RV64-NEXT: .LBB15_8: # %else26
948 ; RV64-NEXT: .LBB15_9: # %cond.load
949 ; RV64-NEXT: ld a2, 0(a0)
950 ; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma
951 ; RV64-NEXT: vmv.s.x v8, a2
952 ; RV64-NEXT: addi a0, a0, 8
953 ; RV64-NEXT: andi a2, a1, 2
954 ; RV64-NEXT: beqz a2, .LBB15_2
955 ; RV64-NEXT: .LBB15_10: # %cond.load1
956 ; RV64-NEXT: ld a2, 0(a0)
957 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
958 ; RV64-NEXT: vmv.s.x v12, a2
959 ; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma
960 ; RV64-NEXT: vslideup.vi v8, v12, 1
961 ; RV64-NEXT: addi a0, a0, 8
962 ; RV64-NEXT: andi a2, a1, 4
963 ; RV64-NEXT: beqz a2, .LBB15_3
964 ; RV64-NEXT: .LBB15_11: # %cond.load5
965 ; RV64-NEXT: ld a2, 0(a0)
966 ; RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma
967 ; RV64-NEXT: vmv.s.x v12, a2
968 ; RV64-NEXT: vslideup.vi v8, v12, 2
969 ; RV64-NEXT: addi a0, a0, 8
970 ; RV64-NEXT: andi a2, a1, 8
971 ; RV64-NEXT: beqz a2, .LBB15_4
972 ; RV64-NEXT: .LBB15_12: # %cond.load9
973 ; RV64-NEXT: ld a2, 0(a0)
974 ; RV64-NEXT: vsetivli zero, 4, e64, m2, tu, ma
975 ; RV64-NEXT: vmv.s.x v12, a2
976 ; RV64-NEXT: vslideup.vi v8, v12, 3
977 ; RV64-NEXT: addi a0, a0, 8
978 ; RV64-NEXT: andi a2, a1, 16
979 ; RV64-NEXT: beqz a2, .LBB15_5
980 ; RV64-NEXT: .LBB15_13: # %cond.load13
981 ; RV64-NEXT: ld a2, 0(a0)
982 ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
983 ; RV64-NEXT: vmv.s.x v12, a2
984 ; RV64-NEXT: vslideup.vi v8, v12, 4
985 ; RV64-NEXT: addi a0, a0, 8
986 ; RV64-NEXT: andi a2, a1, 32
987 ; RV64-NEXT: beqz a2, .LBB15_6
988 ; RV64-NEXT: .LBB15_14: # %cond.load17
989 ; RV64-NEXT: ld a2, 0(a0)
990 ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
991 ; RV64-NEXT: vmv.s.x v12, a2
992 ; RV64-NEXT: vslideup.vi v8, v12, 5
993 ; RV64-NEXT: addi a0, a0, 8
994 ; RV64-NEXT: andi a2, a1, 64
995 ; RV64-NEXT: beqz a2, .LBB15_7
996 ; RV64-NEXT: .LBB15_15: # %cond.load21
997 ; RV64-NEXT: ld a2, 0(a0)
998 ; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma
999 ; RV64-NEXT: vmv.s.x v12, a2
1000 ; RV64-NEXT: vslideup.vi v8, v12, 6
1001 ; RV64-NEXT: addi a0, a0, 8
1002 ; RV64-NEXT: andi a1, a1, -128
1003 ; RV64-NEXT: beqz a1, .LBB15_8
1004 ; RV64-NEXT: .LBB15_16: # %cond.load25
1005 ; RV64-NEXT: ld a0, 0(a0)
1006 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1007 ; RV64-NEXT: vmv.s.x v12, a0
1008 ; RV64-NEXT: vslideup.vi v8, v12, 7
1010 %res = call <8 x i64> @llvm.masked.expandload.v8i64(ptr align 8 %base, <8 x i1> %mask, <8 x i64> %src0)