1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
5 declare <1 x half> @llvm.masked.expandload.v1f16(ptr, <1 x i1>, <1 x half>)
6 define <1 x half> @expandload_v1f16(ptr %base, <1 x half> %src0, <1 x i1> %mask) {
7 ; RV32-LABEL: expandload_v1f16:
9 ; RV32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
10 ; RV32-NEXT: vfirst.m a1, v0
11 ; RV32-NEXT: bnez a1, .LBB0_2
12 ; RV32-NEXT: # %bb.1: # %cond.load
13 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
14 ; RV32-NEXT: vle16.v v8, (a0)
15 ; RV32-NEXT: .LBB0_2: # %else
18 ; RV64-LABEL: expandload_v1f16:
20 ; RV64-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
21 ; RV64-NEXT: vfirst.m a1, v0
22 ; RV64-NEXT: bnez a1, .LBB0_2
23 ; RV64-NEXT: # %bb.1: # %cond.load
24 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
25 ; RV64-NEXT: vle16.v v8, (a0)
26 ; RV64-NEXT: .LBB0_2: # %else
28 %res = call <1 x half> @llvm.masked.expandload.v1f16(ptr align 2 %base, <1 x i1> %mask, <1 x half> %src0)
32 declare <2 x half> @llvm.masked.expandload.v2f16(ptr, <2 x i1>, <2 x half>)
33 define <2 x half> @expandload_v2f16(ptr %base, <2 x half> %src0, <2 x i1> %mask) {
34 ; RV32-LABEL: expandload_v2f16:
36 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
37 ; RV32-NEXT: vmv.x.s a1, v0
38 ; RV32-NEXT: andi a2, a1, 1
39 ; RV32-NEXT: bnez a2, .LBB1_3
40 ; RV32-NEXT: # %bb.1: # %else
41 ; RV32-NEXT: andi a1, a1, 2
42 ; RV32-NEXT: bnez a1, .LBB1_4
43 ; RV32-NEXT: .LBB1_2: # %else2
45 ; RV32-NEXT: .LBB1_3: # %cond.load
46 ; RV32-NEXT: flh fa5, 0(a0)
47 ; RV32-NEXT: vsetvli zero, zero, e16, m2, tu, ma
48 ; RV32-NEXT: vfmv.s.f v8, fa5
49 ; RV32-NEXT: addi a0, a0, 2
50 ; RV32-NEXT: andi a1, a1, 2
51 ; RV32-NEXT: beqz a1, .LBB1_2
52 ; RV32-NEXT: .LBB1_4: # %cond.load1
53 ; RV32-NEXT: flh fa5, 0(a0)
54 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
55 ; RV32-NEXT: vfmv.s.f v9, fa5
56 ; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
57 ; RV32-NEXT: vslideup.vi v8, v9, 1
60 ; RV64-LABEL: expandload_v2f16:
62 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
63 ; RV64-NEXT: vmv.x.s a1, v0
64 ; RV64-NEXT: andi a2, a1, 1
65 ; RV64-NEXT: bnez a2, .LBB1_3
66 ; RV64-NEXT: # %bb.1: # %else
67 ; RV64-NEXT: andi a1, a1, 2
68 ; RV64-NEXT: bnez a1, .LBB1_4
69 ; RV64-NEXT: .LBB1_2: # %else2
71 ; RV64-NEXT: .LBB1_3: # %cond.load
72 ; RV64-NEXT: flh fa5, 0(a0)
73 ; RV64-NEXT: vsetvli zero, zero, e16, m2, tu, ma
74 ; RV64-NEXT: vfmv.s.f v8, fa5
75 ; RV64-NEXT: addi a0, a0, 2
76 ; RV64-NEXT: andi a1, a1, 2
77 ; RV64-NEXT: beqz a1, .LBB1_2
78 ; RV64-NEXT: .LBB1_4: # %cond.load1
79 ; RV64-NEXT: flh fa5, 0(a0)
80 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
81 ; RV64-NEXT: vfmv.s.f v9, fa5
82 ; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
83 ; RV64-NEXT: vslideup.vi v8, v9, 1
85 %res = call <2 x half> @llvm.masked.expandload.v2f16(ptr align 2 %base, <2 x i1> %mask, <2 x half> %src0)
89 declare <4 x half> @llvm.masked.expandload.v4f16(ptr, <4 x i1>, <4 x half>)
90 define <4 x half> @expandload_v4f16(ptr %base, <4 x half> %src0, <4 x i1> %mask) {
91 ; RV32-LABEL: expandload_v4f16:
93 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
94 ; RV32-NEXT: vmv.x.s a1, v0
95 ; RV32-NEXT: andi a2, a1, 1
96 ; RV32-NEXT: bnez a2, .LBB2_5
97 ; RV32-NEXT: # %bb.1: # %else
98 ; RV32-NEXT: andi a2, a1, 2
99 ; RV32-NEXT: bnez a2, .LBB2_6
100 ; RV32-NEXT: .LBB2_2: # %else2
101 ; RV32-NEXT: andi a2, a1, 4
102 ; RV32-NEXT: bnez a2, .LBB2_7
103 ; RV32-NEXT: .LBB2_3: # %else6
104 ; RV32-NEXT: andi a1, a1, 8
105 ; RV32-NEXT: bnez a1, .LBB2_8
106 ; RV32-NEXT: .LBB2_4: # %else10
108 ; RV32-NEXT: .LBB2_5: # %cond.load
109 ; RV32-NEXT: flh fa5, 0(a0)
110 ; RV32-NEXT: vsetvli zero, zero, e16, m2, tu, ma
111 ; RV32-NEXT: vfmv.s.f v8, fa5
112 ; RV32-NEXT: addi a0, a0, 2
113 ; RV32-NEXT: andi a2, a1, 2
114 ; RV32-NEXT: beqz a2, .LBB2_2
115 ; RV32-NEXT: .LBB2_6: # %cond.load1
116 ; RV32-NEXT: flh fa5, 0(a0)
117 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
118 ; RV32-NEXT: vfmv.s.f v9, fa5
119 ; RV32-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
120 ; RV32-NEXT: vslideup.vi v8, v9, 1
121 ; RV32-NEXT: addi a0, a0, 2
122 ; RV32-NEXT: andi a2, a1, 4
123 ; RV32-NEXT: beqz a2, .LBB2_3
124 ; RV32-NEXT: .LBB2_7: # %cond.load5
125 ; RV32-NEXT: flh fa5, 0(a0)
126 ; RV32-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
127 ; RV32-NEXT: vfmv.s.f v9, fa5
128 ; RV32-NEXT: vslideup.vi v8, v9, 2
129 ; RV32-NEXT: addi a0, a0, 2
130 ; RV32-NEXT: andi a1, a1, 8
131 ; RV32-NEXT: beqz a1, .LBB2_4
132 ; RV32-NEXT: .LBB2_8: # %cond.load9
133 ; RV32-NEXT: flh fa5, 0(a0)
134 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
135 ; RV32-NEXT: vfmv.s.f v9, fa5
136 ; RV32-NEXT: vslideup.vi v8, v9, 3
139 ; RV64-LABEL: expandload_v4f16:
141 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
142 ; RV64-NEXT: vmv.x.s a1, v0
143 ; RV64-NEXT: andi a2, a1, 1
144 ; RV64-NEXT: bnez a2, .LBB2_5
145 ; RV64-NEXT: # %bb.1: # %else
146 ; RV64-NEXT: andi a2, a1, 2
147 ; RV64-NEXT: bnez a2, .LBB2_6
148 ; RV64-NEXT: .LBB2_2: # %else2
149 ; RV64-NEXT: andi a2, a1, 4
150 ; RV64-NEXT: bnez a2, .LBB2_7
151 ; RV64-NEXT: .LBB2_3: # %else6
152 ; RV64-NEXT: andi a1, a1, 8
153 ; RV64-NEXT: bnez a1, .LBB2_8
154 ; RV64-NEXT: .LBB2_4: # %else10
156 ; RV64-NEXT: .LBB2_5: # %cond.load
157 ; RV64-NEXT: flh fa5, 0(a0)
158 ; RV64-NEXT: vsetvli zero, zero, e16, m2, tu, ma
159 ; RV64-NEXT: vfmv.s.f v8, fa5
160 ; RV64-NEXT: addi a0, a0, 2
161 ; RV64-NEXT: andi a2, a1, 2
162 ; RV64-NEXT: beqz a2, .LBB2_2
163 ; RV64-NEXT: .LBB2_6: # %cond.load1
164 ; RV64-NEXT: flh fa5, 0(a0)
165 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
166 ; RV64-NEXT: vfmv.s.f v9, fa5
167 ; RV64-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
168 ; RV64-NEXT: vslideup.vi v8, v9, 1
169 ; RV64-NEXT: addi a0, a0, 2
170 ; RV64-NEXT: andi a2, a1, 4
171 ; RV64-NEXT: beqz a2, .LBB2_3
172 ; RV64-NEXT: .LBB2_7: # %cond.load5
173 ; RV64-NEXT: flh fa5, 0(a0)
174 ; RV64-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
175 ; RV64-NEXT: vfmv.s.f v9, fa5
176 ; RV64-NEXT: vslideup.vi v8, v9, 2
177 ; RV64-NEXT: addi a0, a0, 2
178 ; RV64-NEXT: andi a1, a1, 8
179 ; RV64-NEXT: beqz a1, .LBB2_4
180 ; RV64-NEXT: .LBB2_8: # %cond.load9
181 ; RV64-NEXT: flh fa5, 0(a0)
182 ; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
183 ; RV64-NEXT: vfmv.s.f v9, fa5
184 ; RV64-NEXT: vslideup.vi v8, v9, 3
186 %res = call <4 x half> @llvm.masked.expandload.v4f16(ptr align 2 %base, <4 x i1> %mask, <4 x half> %src0)
190 declare <8 x half> @llvm.masked.expandload.v8f16(ptr, <8 x i1>, <8 x half>)
191 define <8 x half> @expandload_v8f16(ptr %base, <8 x half> %src0, <8 x i1> %mask) {
192 ; RV32-LABEL: expandload_v8f16:
194 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
195 ; RV32-NEXT: vmv.x.s a1, v0
196 ; RV32-NEXT: andi a2, a1, 1
197 ; RV32-NEXT: bnez a2, .LBB3_9
198 ; RV32-NEXT: # %bb.1: # %else
199 ; RV32-NEXT: andi a2, a1, 2
200 ; RV32-NEXT: bnez a2, .LBB3_10
201 ; RV32-NEXT: .LBB3_2: # %else2
202 ; RV32-NEXT: andi a2, a1, 4
203 ; RV32-NEXT: bnez a2, .LBB3_11
204 ; RV32-NEXT: .LBB3_3: # %else6
205 ; RV32-NEXT: andi a2, a1, 8
206 ; RV32-NEXT: bnez a2, .LBB3_12
207 ; RV32-NEXT: .LBB3_4: # %else10
208 ; RV32-NEXT: andi a2, a1, 16
209 ; RV32-NEXT: bnez a2, .LBB3_13
210 ; RV32-NEXT: .LBB3_5: # %else14
211 ; RV32-NEXT: andi a2, a1, 32
212 ; RV32-NEXT: bnez a2, .LBB3_14
213 ; RV32-NEXT: .LBB3_6: # %else18
214 ; RV32-NEXT: andi a2, a1, 64
215 ; RV32-NEXT: bnez a2, .LBB3_15
216 ; RV32-NEXT: .LBB3_7: # %else22
217 ; RV32-NEXT: andi a1, a1, -128
218 ; RV32-NEXT: bnez a1, .LBB3_16
219 ; RV32-NEXT: .LBB3_8: # %else26
221 ; RV32-NEXT: .LBB3_9: # %cond.load
222 ; RV32-NEXT: flh fa5, 0(a0)
223 ; RV32-NEXT: vsetvli zero, zero, e16, m2, tu, ma
224 ; RV32-NEXT: vfmv.s.f v8, fa5
225 ; RV32-NEXT: addi a0, a0, 2
226 ; RV32-NEXT: andi a2, a1, 2
227 ; RV32-NEXT: beqz a2, .LBB3_2
228 ; RV32-NEXT: .LBB3_10: # %cond.load1
229 ; RV32-NEXT: flh fa5, 0(a0)
230 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
231 ; RV32-NEXT: vfmv.s.f v9, fa5
232 ; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
233 ; RV32-NEXT: vslideup.vi v8, v9, 1
234 ; RV32-NEXT: addi a0, a0, 2
235 ; RV32-NEXT: andi a2, a1, 4
236 ; RV32-NEXT: beqz a2, .LBB3_3
237 ; RV32-NEXT: .LBB3_11: # %cond.load5
238 ; RV32-NEXT: flh fa5, 0(a0)
239 ; RV32-NEXT: vsetivli zero, 3, e16, m1, tu, ma
240 ; RV32-NEXT: vfmv.s.f v9, fa5
241 ; RV32-NEXT: vslideup.vi v8, v9, 2
242 ; RV32-NEXT: addi a0, a0, 2
243 ; RV32-NEXT: andi a2, a1, 8
244 ; RV32-NEXT: beqz a2, .LBB3_4
245 ; RV32-NEXT: .LBB3_12: # %cond.load9
246 ; RV32-NEXT: flh fa5, 0(a0)
247 ; RV32-NEXT: vsetivli zero, 4, e16, m1, tu, ma
248 ; RV32-NEXT: vfmv.s.f v9, fa5
249 ; RV32-NEXT: vslideup.vi v8, v9, 3
250 ; RV32-NEXT: addi a0, a0, 2
251 ; RV32-NEXT: andi a2, a1, 16
252 ; RV32-NEXT: beqz a2, .LBB3_5
253 ; RV32-NEXT: .LBB3_13: # %cond.load13
254 ; RV32-NEXT: flh fa5, 0(a0)
255 ; RV32-NEXT: vsetivli zero, 5, e16, m1, tu, ma
256 ; RV32-NEXT: vfmv.s.f v9, fa5
257 ; RV32-NEXT: vslideup.vi v8, v9, 4
258 ; RV32-NEXT: addi a0, a0, 2
259 ; RV32-NEXT: andi a2, a1, 32
260 ; RV32-NEXT: beqz a2, .LBB3_6
261 ; RV32-NEXT: .LBB3_14: # %cond.load17
262 ; RV32-NEXT: flh fa5, 0(a0)
263 ; RV32-NEXT: vsetivli zero, 6, e16, m1, tu, ma
264 ; RV32-NEXT: vfmv.s.f v9, fa5
265 ; RV32-NEXT: vslideup.vi v8, v9, 5
266 ; RV32-NEXT: addi a0, a0, 2
267 ; RV32-NEXT: andi a2, a1, 64
268 ; RV32-NEXT: beqz a2, .LBB3_7
269 ; RV32-NEXT: .LBB3_15: # %cond.load21
270 ; RV32-NEXT: flh fa5, 0(a0)
271 ; RV32-NEXT: vsetivli zero, 7, e16, m1, tu, ma
272 ; RV32-NEXT: vfmv.s.f v9, fa5
273 ; RV32-NEXT: vslideup.vi v8, v9, 6
274 ; RV32-NEXT: addi a0, a0, 2
275 ; RV32-NEXT: andi a1, a1, -128
276 ; RV32-NEXT: beqz a1, .LBB3_8
277 ; RV32-NEXT: .LBB3_16: # %cond.load25
278 ; RV32-NEXT: flh fa5, 0(a0)
279 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
280 ; RV32-NEXT: vfmv.s.f v9, fa5
281 ; RV32-NEXT: vslideup.vi v8, v9, 7
284 ; RV64-LABEL: expandload_v8f16:
286 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
287 ; RV64-NEXT: vmv.x.s a1, v0
288 ; RV64-NEXT: andi a2, a1, 1
289 ; RV64-NEXT: bnez a2, .LBB3_9
290 ; RV64-NEXT: # %bb.1: # %else
291 ; RV64-NEXT: andi a2, a1, 2
292 ; RV64-NEXT: bnez a2, .LBB3_10
293 ; RV64-NEXT: .LBB3_2: # %else2
294 ; RV64-NEXT: andi a2, a1, 4
295 ; RV64-NEXT: bnez a2, .LBB3_11
296 ; RV64-NEXT: .LBB3_3: # %else6
297 ; RV64-NEXT: andi a2, a1, 8
298 ; RV64-NEXT: bnez a2, .LBB3_12
299 ; RV64-NEXT: .LBB3_4: # %else10
300 ; RV64-NEXT: andi a2, a1, 16
301 ; RV64-NEXT: bnez a2, .LBB3_13
302 ; RV64-NEXT: .LBB3_5: # %else14
303 ; RV64-NEXT: andi a2, a1, 32
304 ; RV64-NEXT: bnez a2, .LBB3_14
305 ; RV64-NEXT: .LBB3_6: # %else18
306 ; RV64-NEXT: andi a2, a1, 64
307 ; RV64-NEXT: bnez a2, .LBB3_15
308 ; RV64-NEXT: .LBB3_7: # %else22
309 ; RV64-NEXT: andi a1, a1, -128
310 ; RV64-NEXT: bnez a1, .LBB3_16
311 ; RV64-NEXT: .LBB3_8: # %else26
313 ; RV64-NEXT: .LBB3_9: # %cond.load
314 ; RV64-NEXT: flh fa5, 0(a0)
315 ; RV64-NEXT: vsetvli zero, zero, e16, m2, tu, ma
316 ; RV64-NEXT: vfmv.s.f v8, fa5
317 ; RV64-NEXT: addi a0, a0, 2
318 ; RV64-NEXT: andi a2, a1, 2
319 ; RV64-NEXT: beqz a2, .LBB3_2
320 ; RV64-NEXT: .LBB3_10: # %cond.load1
321 ; RV64-NEXT: flh fa5, 0(a0)
322 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
323 ; RV64-NEXT: vfmv.s.f v9, fa5
324 ; RV64-NEXT: vsetivli zero, 2, e16, m1, tu, ma
325 ; RV64-NEXT: vslideup.vi v8, v9, 1
326 ; RV64-NEXT: addi a0, a0, 2
327 ; RV64-NEXT: andi a2, a1, 4
328 ; RV64-NEXT: beqz a2, .LBB3_3
329 ; RV64-NEXT: .LBB3_11: # %cond.load5
330 ; RV64-NEXT: flh fa5, 0(a0)
331 ; RV64-NEXT: vsetivli zero, 3, e16, m1, tu, ma
332 ; RV64-NEXT: vfmv.s.f v9, fa5
333 ; RV64-NEXT: vslideup.vi v8, v9, 2
334 ; RV64-NEXT: addi a0, a0, 2
335 ; RV64-NEXT: andi a2, a1, 8
336 ; RV64-NEXT: beqz a2, .LBB3_4
337 ; RV64-NEXT: .LBB3_12: # %cond.load9
338 ; RV64-NEXT: flh fa5, 0(a0)
339 ; RV64-NEXT: vsetivli zero, 4, e16, m1, tu, ma
340 ; RV64-NEXT: vfmv.s.f v9, fa5
341 ; RV64-NEXT: vslideup.vi v8, v9, 3
342 ; RV64-NEXT: addi a0, a0, 2
343 ; RV64-NEXT: andi a2, a1, 16
344 ; RV64-NEXT: beqz a2, .LBB3_5
345 ; RV64-NEXT: .LBB3_13: # %cond.load13
346 ; RV64-NEXT: flh fa5, 0(a0)
347 ; RV64-NEXT: vsetivli zero, 5, e16, m1, tu, ma
348 ; RV64-NEXT: vfmv.s.f v9, fa5
349 ; RV64-NEXT: vslideup.vi v8, v9, 4
350 ; RV64-NEXT: addi a0, a0, 2
351 ; RV64-NEXT: andi a2, a1, 32
352 ; RV64-NEXT: beqz a2, .LBB3_6
353 ; RV64-NEXT: .LBB3_14: # %cond.load17
354 ; RV64-NEXT: flh fa5, 0(a0)
355 ; RV64-NEXT: vsetivli zero, 6, e16, m1, tu, ma
356 ; RV64-NEXT: vfmv.s.f v9, fa5
357 ; RV64-NEXT: vslideup.vi v8, v9, 5
358 ; RV64-NEXT: addi a0, a0, 2
359 ; RV64-NEXT: andi a2, a1, 64
360 ; RV64-NEXT: beqz a2, .LBB3_7
361 ; RV64-NEXT: .LBB3_15: # %cond.load21
362 ; RV64-NEXT: flh fa5, 0(a0)
363 ; RV64-NEXT: vsetivli zero, 7, e16, m1, tu, ma
364 ; RV64-NEXT: vfmv.s.f v9, fa5
365 ; RV64-NEXT: vslideup.vi v8, v9, 6
366 ; RV64-NEXT: addi a0, a0, 2
367 ; RV64-NEXT: andi a1, a1, -128
368 ; RV64-NEXT: beqz a1, .LBB3_8
369 ; RV64-NEXT: .LBB3_16: # %cond.load25
370 ; RV64-NEXT: flh fa5, 0(a0)
371 ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
372 ; RV64-NEXT: vfmv.s.f v9, fa5
373 ; RV64-NEXT: vslideup.vi v8, v9, 7
375 %res = call <8 x half> @llvm.masked.expandload.v8f16(ptr align 2 %base, <8 x i1> %mask, <8 x half> %src0)
379 declare <1 x float> @llvm.masked.expandload.v1f32(ptr, <1 x i1>, <1 x float>)
380 define <1 x float> @expandload_v1f32(ptr %base, <1 x float> %src0, <1 x i1> %mask) {
381 ; RV32-LABEL: expandload_v1f32:
383 ; RV32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
384 ; RV32-NEXT: vfirst.m a1, v0
385 ; RV32-NEXT: bnez a1, .LBB4_2
386 ; RV32-NEXT: # %bb.1: # %cond.load
387 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
388 ; RV32-NEXT: vle32.v v8, (a0)
389 ; RV32-NEXT: .LBB4_2: # %else
392 ; RV64-LABEL: expandload_v1f32:
394 ; RV64-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
395 ; RV64-NEXT: vfirst.m a1, v0
396 ; RV64-NEXT: bnez a1, .LBB4_2
397 ; RV64-NEXT: # %bb.1: # %cond.load
398 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
399 ; RV64-NEXT: vle32.v v8, (a0)
400 ; RV64-NEXT: .LBB4_2: # %else
402 %res = call <1 x float> @llvm.masked.expandload.v1f32(ptr align 4 %base, <1 x i1> %mask, <1 x float> %src0)
406 declare <2 x float> @llvm.masked.expandload.v2f32(ptr, <2 x i1>, <2 x float>)
407 define <2 x float> @expandload_v2f32(ptr %base, <2 x float> %src0, <2 x i1> %mask) {
408 ; RV32-LABEL: expandload_v2f32:
410 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
411 ; RV32-NEXT: vmv.x.s a1, v0
412 ; RV32-NEXT: andi a2, a1, 1
413 ; RV32-NEXT: bnez a2, .LBB5_3
414 ; RV32-NEXT: # %bb.1: # %else
415 ; RV32-NEXT: andi a1, a1, 2
416 ; RV32-NEXT: bnez a1, .LBB5_4
417 ; RV32-NEXT: .LBB5_2: # %else2
419 ; RV32-NEXT: .LBB5_3: # %cond.load
420 ; RV32-NEXT: flw fa5, 0(a0)
421 ; RV32-NEXT: vsetvli zero, zero, e32, m4, tu, ma
422 ; RV32-NEXT: vfmv.s.f v8, fa5
423 ; RV32-NEXT: addi a0, a0, 4
424 ; RV32-NEXT: andi a1, a1, 2
425 ; RV32-NEXT: beqz a1, .LBB5_2
426 ; RV32-NEXT: .LBB5_4: # %cond.load1
427 ; RV32-NEXT: flw fa5, 0(a0)
428 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
429 ; RV32-NEXT: vfmv.s.f v9, fa5
430 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
431 ; RV32-NEXT: vslideup.vi v8, v9, 1
434 ; RV64-LABEL: expandload_v2f32:
436 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
437 ; RV64-NEXT: vmv.x.s a1, v0
438 ; RV64-NEXT: andi a2, a1, 1
439 ; RV64-NEXT: bnez a2, .LBB5_3
440 ; RV64-NEXT: # %bb.1: # %else
441 ; RV64-NEXT: andi a1, a1, 2
442 ; RV64-NEXT: bnez a1, .LBB5_4
443 ; RV64-NEXT: .LBB5_2: # %else2
445 ; RV64-NEXT: .LBB5_3: # %cond.load
446 ; RV64-NEXT: flw fa5, 0(a0)
447 ; RV64-NEXT: vsetvli zero, zero, e32, m4, tu, ma
448 ; RV64-NEXT: vfmv.s.f v8, fa5
449 ; RV64-NEXT: addi a0, a0, 4
450 ; RV64-NEXT: andi a1, a1, 2
451 ; RV64-NEXT: beqz a1, .LBB5_2
452 ; RV64-NEXT: .LBB5_4: # %cond.load1
453 ; RV64-NEXT: flw fa5, 0(a0)
454 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
455 ; RV64-NEXT: vfmv.s.f v9, fa5
456 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
457 ; RV64-NEXT: vslideup.vi v8, v9, 1
459 %res = call <2 x float> @llvm.masked.expandload.v2f32(ptr align 4 %base, <2 x i1> %mask, <2 x float> %src0)
463 declare <4 x float> @llvm.masked.expandload.v4f32(ptr, <4 x i1>, <4 x float>)
464 define <4 x float> @expandload_v4f32(ptr %base, <4 x float> %src0, <4 x i1> %mask) {
465 ; RV32-LABEL: expandload_v4f32:
467 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
468 ; RV32-NEXT: vmv.x.s a1, v0
469 ; RV32-NEXT: andi a2, a1, 1
470 ; RV32-NEXT: bnez a2, .LBB6_5
471 ; RV32-NEXT: # %bb.1: # %else
472 ; RV32-NEXT: andi a2, a1, 2
473 ; RV32-NEXT: bnez a2, .LBB6_6
474 ; RV32-NEXT: .LBB6_2: # %else2
475 ; RV32-NEXT: andi a2, a1, 4
476 ; RV32-NEXT: bnez a2, .LBB6_7
477 ; RV32-NEXT: .LBB6_3: # %else6
478 ; RV32-NEXT: andi a1, a1, 8
479 ; RV32-NEXT: bnez a1, .LBB6_8
480 ; RV32-NEXT: .LBB6_4: # %else10
482 ; RV32-NEXT: .LBB6_5: # %cond.load
483 ; RV32-NEXT: flw fa5, 0(a0)
484 ; RV32-NEXT: vsetvli zero, zero, e32, m4, tu, ma
485 ; RV32-NEXT: vfmv.s.f v8, fa5
486 ; RV32-NEXT: addi a0, a0, 4
487 ; RV32-NEXT: andi a2, a1, 2
488 ; RV32-NEXT: beqz a2, .LBB6_2
489 ; RV32-NEXT: .LBB6_6: # %cond.load1
490 ; RV32-NEXT: flw fa5, 0(a0)
491 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
492 ; RV32-NEXT: vfmv.s.f v9, fa5
493 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
494 ; RV32-NEXT: vslideup.vi v8, v9, 1
495 ; RV32-NEXT: addi a0, a0, 4
496 ; RV32-NEXT: andi a2, a1, 4
497 ; RV32-NEXT: beqz a2, .LBB6_3
498 ; RV32-NEXT: .LBB6_7: # %cond.load5
499 ; RV32-NEXT: flw fa5, 0(a0)
500 ; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma
501 ; RV32-NEXT: vfmv.s.f v9, fa5
502 ; RV32-NEXT: vslideup.vi v8, v9, 2
503 ; RV32-NEXT: addi a0, a0, 4
504 ; RV32-NEXT: andi a1, a1, 8
505 ; RV32-NEXT: beqz a1, .LBB6_4
506 ; RV32-NEXT: .LBB6_8: # %cond.load9
507 ; RV32-NEXT: flw fa5, 0(a0)
508 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
509 ; RV32-NEXT: vfmv.s.f v9, fa5
510 ; RV32-NEXT: vslideup.vi v8, v9, 3
513 ; RV64-LABEL: expandload_v4f32:
515 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
516 ; RV64-NEXT: vmv.x.s a1, v0
517 ; RV64-NEXT: andi a2, a1, 1
518 ; RV64-NEXT: bnez a2, .LBB6_5
519 ; RV64-NEXT: # %bb.1: # %else
520 ; RV64-NEXT: andi a2, a1, 2
521 ; RV64-NEXT: bnez a2, .LBB6_6
522 ; RV64-NEXT: .LBB6_2: # %else2
523 ; RV64-NEXT: andi a2, a1, 4
524 ; RV64-NEXT: bnez a2, .LBB6_7
525 ; RV64-NEXT: .LBB6_3: # %else6
526 ; RV64-NEXT: andi a1, a1, 8
527 ; RV64-NEXT: bnez a1, .LBB6_8
528 ; RV64-NEXT: .LBB6_4: # %else10
530 ; RV64-NEXT: .LBB6_5: # %cond.load
531 ; RV64-NEXT: flw fa5, 0(a0)
532 ; RV64-NEXT: vsetvli zero, zero, e32, m4, tu, ma
533 ; RV64-NEXT: vfmv.s.f v8, fa5
534 ; RV64-NEXT: addi a0, a0, 4
535 ; RV64-NEXT: andi a2, a1, 2
536 ; RV64-NEXT: beqz a2, .LBB6_2
537 ; RV64-NEXT: .LBB6_6: # %cond.load1
538 ; RV64-NEXT: flw fa5, 0(a0)
539 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
540 ; RV64-NEXT: vfmv.s.f v9, fa5
541 ; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma
542 ; RV64-NEXT: vslideup.vi v8, v9, 1
543 ; RV64-NEXT: addi a0, a0, 4
544 ; RV64-NEXT: andi a2, a1, 4
545 ; RV64-NEXT: beqz a2, .LBB6_3
546 ; RV64-NEXT: .LBB6_7: # %cond.load5
547 ; RV64-NEXT: flw fa5, 0(a0)
548 ; RV64-NEXT: vsetivli zero, 3, e32, m1, tu, ma
549 ; RV64-NEXT: vfmv.s.f v9, fa5
550 ; RV64-NEXT: vslideup.vi v8, v9, 2
551 ; RV64-NEXT: addi a0, a0, 4
552 ; RV64-NEXT: andi a1, a1, 8
553 ; RV64-NEXT: beqz a1, .LBB6_4
554 ; RV64-NEXT: .LBB6_8: # %cond.load9
555 ; RV64-NEXT: flw fa5, 0(a0)
556 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
557 ; RV64-NEXT: vfmv.s.f v9, fa5
558 ; RV64-NEXT: vslideup.vi v8, v9, 3
560 %res = call <4 x float> @llvm.masked.expandload.v4f32(ptr align 4 %base, <4 x i1> %mask, <4 x float> %src0)
564 declare <8 x float> @llvm.masked.expandload.v8f32(ptr, <8 x i1>, <8 x float>)
565 define <8 x float> @expandload_v8f32(ptr %base, <8 x float> %src0, <8 x i1> %mask) {
566 ; RV32-LABEL: expandload_v8f32:
568 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
569 ; RV32-NEXT: vmv.x.s a1, v0
570 ; RV32-NEXT: andi a2, a1, 1
571 ; RV32-NEXT: bnez a2, .LBB7_9
572 ; RV32-NEXT: # %bb.1: # %else
573 ; RV32-NEXT: andi a2, a1, 2
574 ; RV32-NEXT: bnez a2, .LBB7_10
575 ; RV32-NEXT: .LBB7_2: # %else2
576 ; RV32-NEXT: andi a2, a1, 4
577 ; RV32-NEXT: bnez a2, .LBB7_11
578 ; RV32-NEXT: .LBB7_3: # %else6
579 ; RV32-NEXT: andi a2, a1, 8
580 ; RV32-NEXT: bnez a2, .LBB7_12
581 ; RV32-NEXT: .LBB7_4: # %else10
582 ; RV32-NEXT: andi a2, a1, 16
583 ; RV32-NEXT: bnez a2, .LBB7_13
584 ; RV32-NEXT: .LBB7_5: # %else14
585 ; RV32-NEXT: andi a2, a1, 32
586 ; RV32-NEXT: bnez a2, .LBB7_14
587 ; RV32-NEXT: .LBB7_6: # %else18
588 ; RV32-NEXT: andi a2, a1, 64
589 ; RV32-NEXT: bnez a2, .LBB7_15
590 ; RV32-NEXT: .LBB7_7: # %else22
591 ; RV32-NEXT: andi a1, a1, -128
592 ; RV32-NEXT: bnez a1, .LBB7_16
593 ; RV32-NEXT: .LBB7_8: # %else26
595 ; RV32-NEXT: .LBB7_9: # %cond.load
596 ; RV32-NEXT: flw fa5, 0(a0)
597 ; RV32-NEXT: vsetvli zero, zero, e32, m4, tu, ma
598 ; RV32-NEXT: vfmv.s.f v8, fa5
599 ; RV32-NEXT: addi a0, a0, 4
600 ; RV32-NEXT: andi a2, a1, 2
601 ; RV32-NEXT: beqz a2, .LBB7_2
602 ; RV32-NEXT: .LBB7_10: # %cond.load1
603 ; RV32-NEXT: flw fa5, 0(a0)
604 ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma
605 ; RV32-NEXT: vfmv.s.f v10, fa5
606 ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma
607 ; RV32-NEXT: vslideup.vi v8, v10, 1
608 ; RV32-NEXT: addi a0, a0, 4
609 ; RV32-NEXT: andi a2, a1, 4
610 ; RV32-NEXT: beqz a2, .LBB7_3
611 ; RV32-NEXT: .LBB7_11: # %cond.load5
612 ; RV32-NEXT: flw fa5, 0(a0)
613 ; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma
614 ; RV32-NEXT: vfmv.s.f v10, fa5
615 ; RV32-NEXT: vslideup.vi v8, v10, 2
616 ; RV32-NEXT: addi a0, a0, 4
617 ; RV32-NEXT: andi a2, a1, 8
618 ; RV32-NEXT: beqz a2, .LBB7_4
619 ; RV32-NEXT: .LBB7_12: # %cond.load9
620 ; RV32-NEXT: flw fa5, 0(a0)
621 ; RV32-NEXT: vsetivli zero, 4, e32, m1, tu, ma
622 ; RV32-NEXT: vfmv.s.f v10, fa5
623 ; RV32-NEXT: vslideup.vi v8, v10, 3
624 ; RV32-NEXT: addi a0, a0, 4
625 ; RV32-NEXT: andi a2, a1, 16
626 ; RV32-NEXT: beqz a2, .LBB7_5
627 ; RV32-NEXT: .LBB7_13: # %cond.load13
628 ; RV32-NEXT: flw fa5, 0(a0)
629 ; RV32-NEXT: vsetivli zero, 5, e32, m2, tu, ma
630 ; RV32-NEXT: vfmv.s.f v10, fa5
631 ; RV32-NEXT: vslideup.vi v8, v10, 4
632 ; RV32-NEXT: addi a0, a0, 4
633 ; RV32-NEXT: andi a2, a1, 32
634 ; RV32-NEXT: beqz a2, .LBB7_6
635 ; RV32-NEXT: .LBB7_14: # %cond.load17
636 ; RV32-NEXT: flw fa5, 0(a0)
637 ; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, ma
638 ; RV32-NEXT: vfmv.s.f v10, fa5
639 ; RV32-NEXT: vslideup.vi v8, v10, 5
640 ; RV32-NEXT: addi a0, a0, 4
641 ; RV32-NEXT: andi a2, a1, 64
642 ; RV32-NEXT: beqz a2, .LBB7_7
643 ; RV32-NEXT: .LBB7_15: # %cond.load21
644 ; RV32-NEXT: flw fa5, 0(a0)
645 ; RV32-NEXT: vsetivli zero, 7, e32, m2, tu, ma
646 ; RV32-NEXT: vfmv.s.f v10, fa5
647 ; RV32-NEXT: vslideup.vi v8, v10, 6
648 ; RV32-NEXT: addi a0, a0, 4
649 ; RV32-NEXT: andi a1, a1, -128
650 ; RV32-NEXT: beqz a1, .LBB7_8
651 ; RV32-NEXT: .LBB7_16: # %cond.load25
652 ; RV32-NEXT: flw fa5, 0(a0)
653 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
654 ; RV32-NEXT: vfmv.s.f v10, fa5
655 ; RV32-NEXT: vslideup.vi v8, v10, 7
658 ; RV64-LABEL: expandload_v8f32:
660 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
661 ; RV64-NEXT: vmv.x.s a1, v0
662 ; RV64-NEXT: andi a2, a1, 1
663 ; RV64-NEXT: bnez a2, .LBB7_9
664 ; RV64-NEXT: # %bb.1: # %else
665 ; RV64-NEXT: andi a2, a1, 2
666 ; RV64-NEXT: bnez a2, .LBB7_10
667 ; RV64-NEXT: .LBB7_2: # %else2
668 ; RV64-NEXT: andi a2, a1, 4
669 ; RV64-NEXT: bnez a2, .LBB7_11
670 ; RV64-NEXT: .LBB7_3: # %else6
671 ; RV64-NEXT: andi a2, a1, 8
672 ; RV64-NEXT: bnez a2, .LBB7_12
673 ; RV64-NEXT: .LBB7_4: # %else10
674 ; RV64-NEXT: andi a2, a1, 16
675 ; RV64-NEXT: bnez a2, .LBB7_13
676 ; RV64-NEXT: .LBB7_5: # %else14
677 ; RV64-NEXT: andi a2, a1, 32
678 ; RV64-NEXT: bnez a2, .LBB7_14
679 ; RV64-NEXT: .LBB7_6: # %else18
680 ; RV64-NEXT: andi a2, a1, 64
681 ; RV64-NEXT: bnez a2, .LBB7_15
682 ; RV64-NEXT: .LBB7_7: # %else22
683 ; RV64-NEXT: andi a1, a1, -128
684 ; RV64-NEXT: bnez a1, .LBB7_16
685 ; RV64-NEXT: .LBB7_8: # %else26
687 ; RV64-NEXT: .LBB7_9: # %cond.load
688 ; RV64-NEXT: flw fa5, 0(a0)
689 ; RV64-NEXT: vsetvli zero, zero, e32, m4, tu, ma
690 ; RV64-NEXT: vfmv.s.f v8, fa5
691 ; RV64-NEXT: addi a0, a0, 4
692 ; RV64-NEXT: andi a2, a1, 2
693 ; RV64-NEXT: beqz a2, .LBB7_2
694 ; RV64-NEXT: .LBB7_10: # %cond.load1
695 ; RV64-NEXT: flw fa5, 0(a0)
696 ; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma
697 ; RV64-NEXT: vfmv.s.f v10, fa5
698 ; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma
699 ; RV64-NEXT: vslideup.vi v8, v10, 1
700 ; RV64-NEXT: addi a0, a0, 4
701 ; RV64-NEXT: andi a2, a1, 4
702 ; RV64-NEXT: beqz a2, .LBB7_3
703 ; RV64-NEXT: .LBB7_11: # %cond.load5
704 ; RV64-NEXT: flw fa5, 0(a0)
705 ; RV64-NEXT: vsetivli zero, 3, e32, m1, tu, ma
706 ; RV64-NEXT: vfmv.s.f v10, fa5
707 ; RV64-NEXT: vslideup.vi v8, v10, 2
708 ; RV64-NEXT: addi a0, a0, 4
709 ; RV64-NEXT: andi a2, a1, 8
710 ; RV64-NEXT: beqz a2, .LBB7_4
711 ; RV64-NEXT: .LBB7_12: # %cond.load9
712 ; RV64-NEXT: flw fa5, 0(a0)
713 ; RV64-NEXT: vsetivli zero, 4, e32, m1, tu, ma
714 ; RV64-NEXT: vfmv.s.f v10, fa5
715 ; RV64-NEXT: vslideup.vi v8, v10, 3
716 ; RV64-NEXT: addi a0, a0, 4
717 ; RV64-NEXT: andi a2, a1, 16
718 ; RV64-NEXT: beqz a2, .LBB7_5
719 ; RV64-NEXT: .LBB7_13: # %cond.load13
720 ; RV64-NEXT: flw fa5, 0(a0)
721 ; RV64-NEXT: vsetivli zero, 5, e32, m2, tu, ma
722 ; RV64-NEXT: vfmv.s.f v10, fa5
723 ; RV64-NEXT: vslideup.vi v8, v10, 4
724 ; RV64-NEXT: addi a0, a0, 4
725 ; RV64-NEXT: andi a2, a1, 32
726 ; RV64-NEXT: beqz a2, .LBB7_6
727 ; RV64-NEXT: .LBB7_14: # %cond.load17
728 ; RV64-NEXT: flw fa5, 0(a0)
729 ; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, ma
730 ; RV64-NEXT: vfmv.s.f v10, fa5
731 ; RV64-NEXT: vslideup.vi v8, v10, 5
732 ; RV64-NEXT: addi a0, a0, 4
733 ; RV64-NEXT: andi a2, a1, 64
734 ; RV64-NEXT: beqz a2, .LBB7_7
735 ; RV64-NEXT: .LBB7_15: # %cond.load21
736 ; RV64-NEXT: flw fa5, 0(a0)
737 ; RV64-NEXT: vsetivli zero, 7, e32, m2, tu, ma
738 ; RV64-NEXT: vfmv.s.f v10, fa5
739 ; RV64-NEXT: vslideup.vi v8, v10, 6
740 ; RV64-NEXT: addi a0, a0, 4
741 ; RV64-NEXT: andi a1, a1, -128
742 ; RV64-NEXT: beqz a1, .LBB7_8
743 ; RV64-NEXT: .LBB7_16: # %cond.load25
744 ; RV64-NEXT: flw fa5, 0(a0)
745 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
746 ; RV64-NEXT: vfmv.s.f v10, fa5
747 ; RV64-NEXT: vslideup.vi v8, v10, 7
749 %res = call <8 x float> @llvm.masked.expandload.v8f32(ptr align 4 %base, <8 x i1> %mask, <8 x float> %src0)
753 declare <1 x double> @llvm.masked.expandload.v1f64(ptr, <1 x i1>, <1 x double>)
754 define <1 x double> @expandload_v1f64(ptr %base, <1 x double> %src0, <1 x i1> %mask) {
755 ; RV32-LABEL: expandload_v1f64:
757 ; RV32-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
758 ; RV32-NEXT: vfirst.m a1, v0
759 ; RV32-NEXT: bnez a1, .LBB8_2
760 ; RV32-NEXT: # %bb.1: # %cond.load
761 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
762 ; RV32-NEXT: vle64.v v8, (a0)
763 ; RV32-NEXT: .LBB8_2: # %else
766 ; RV64-LABEL: expandload_v1f64:
768 ; RV64-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
769 ; RV64-NEXT: vfirst.m a1, v0
770 ; RV64-NEXT: bnez a1, .LBB8_2
771 ; RV64-NEXT: # %bb.1: # %cond.load
772 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
773 ; RV64-NEXT: vle64.v v8, (a0)
774 ; RV64-NEXT: .LBB8_2: # %else
776 %res = call <1 x double> @llvm.masked.expandload.v1f64(ptr align 8 %base, <1 x i1> %mask, <1 x double> %src0)
780 declare <2 x double> @llvm.masked.expandload.v2f64(ptr, <2 x i1>, <2 x double>)
781 define <2 x double> @expandload_v2f64(ptr %base, <2 x double> %src0, <2 x i1> %mask) {
782 ; RV32-LABEL: expandload_v2f64:
784 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
785 ; RV32-NEXT: vmv.x.s a1, v0
786 ; RV32-NEXT: andi a2, a1, 1
787 ; RV32-NEXT: bnez a2, .LBB9_3
788 ; RV32-NEXT: # %bb.1: # %else
789 ; RV32-NEXT: andi a1, a1, 2
790 ; RV32-NEXT: bnez a1, .LBB9_4
791 ; RV32-NEXT: .LBB9_2: # %else2
793 ; RV32-NEXT: .LBB9_3: # %cond.load
794 ; RV32-NEXT: fld fa5, 0(a0)
795 ; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma
796 ; RV32-NEXT: vfmv.s.f v8, fa5
797 ; RV32-NEXT: addi a0, a0, 8
798 ; RV32-NEXT: andi a1, a1, 2
799 ; RV32-NEXT: beqz a1, .LBB9_2
800 ; RV32-NEXT: .LBB9_4: # %cond.load1
801 ; RV32-NEXT: fld fa5, 0(a0)
802 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
803 ; RV32-NEXT: vfmv.s.f v9, fa5
804 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
805 ; RV32-NEXT: vslideup.vi v8, v9, 1
808 ; RV64-LABEL: expandload_v2f64:
810 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
811 ; RV64-NEXT: vmv.x.s a1, v0
812 ; RV64-NEXT: andi a2, a1, 1
813 ; RV64-NEXT: bnez a2, .LBB9_3
814 ; RV64-NEXT: # %bb.1: # %else
815 ; RV64-NEXT: andi a1, a1, 2
816 ; RV64-NEXT: bnez a1, .LBB9_4
817 ; RV64-NEXT: .LBB9_2: # %else2
819 ; RV64-NEXT: .LBB9_3: # %cond.load
820 ; RV64-NEXT: fld fa5, 0(a0)
821 ; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma
822 ; RV64-NEXT: vfmv.s.f v8, fa5
823 ; RV64-NEXT: addi a0, a0, 8
824 ; RV64-NEXT: andi a1, a1, 2
825 ; RV64-NEXT: beqz a1, .LBB9_2
826 ; RV64-NEXT: .LBB9_4: # %cond.load1
827 ; RV64-NEXT: fld fa5, 0(a0)
828 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
829 ; RV64-NEXT: vfmv.s.f v9, fa5
830 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
831 ; RV64-NEXT: vslideup.vi v8, v9, 1
833 %res = call <2 x double> @llvm.masked.expandload.v2f64(ptr align 8 %base, <2 x i1> %mask, <2 x double> %src0)
837 declare <4 x double> @llvm.masked.expandload.v4f64(ptr, <4 x i1>, <4 x double>)
838 define <4 x double> @expandload_v4f64(ptr %base, <4 x double> %src0, <4 x i1> %mask) {
839 ; RV32-LABEL: expandload_v4f64:
841 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
842 ; RV32-NEXT: vmv.x.s a1, v0
843 ; RV32-NEXT: andi a2, a1, 1
844 ; RV32-NEXT: bnez a2, .LBB10_5
845 ; RV32-NEXT: # %bb.1: # %else
846 ; RV32-NEXT: andi a2, a1, 2
847 ; RV32-NEXT: bnez a2, .LBB10_6
848 ; RV32-NEXT: .LBB10_2: # %else2
849 ; RV32-NEXT: andi a2, a1, 4
850 ; RV32-NEXT: bnez a2, .LBB10_7
851 ; RV32-NEXT: .LBB10_3: # %else6
852 ; RV32-NEXT: andi a1, a1, 8
853 ; RV32-NEXT: bnez a1, .LBB10_8
854 ; RV32-NEXT: .LBB10_4: # %else10
856 ; RV32-NEXT: .LBB10_5: # %cond.load
857 ; RV32-NEXT: fld fa5, 0(a0)
858 ; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma
859 ; RV32-NEXT: vfmv.s.f v8, fa5
860 ; RV32-NEXT: addi a0, a0, 8
861 ; RV32-NEXT: andi a2, a1, 2
862 ; RV32-NEXT: beqz a2, .LBB10_2
863 ; RV32-NEXT: .LBB10_6: # %cond.load1
864 ; RV32-NEXT: fld fa5, 0(a0)
865 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
866 ; RV32-NEXT: vfmv.s.f v10, fa5
867 ; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma
868 ; RV32-NEXT: vslideup.vi v8, v10, 1
869 ; RV32-NEXT: addi a0, a0, 8
870 ; RV32-NEXT: andi a2, a1, 4
871 ; RV32-NEXT: beqz a2, .LBB10_3
872 ; RV32-NEXT: .LBB10_7: # %cond.load5
873 ; RV32-NEXT: fld fa5, 0(a0)
874 ; RV32-NEXT: vsetivli zero, 3, e64, m2, tu, ma
875 ; RV32-NEXT: vfmv.s.f v10, fa5
876 ; RV32-NEXT: vslideup.vi v8, v10, 2
877 ; RV32-NEXT: addi a0, a0, 8
878 ; RV32-NEXT: andi a1, a1, 8
879 ; RV32-NEXT: beqz a1, .LBB10_4
880 ; RV32-NEXT: .LBB10_8: # %cond.load9
881 ; RV32-NEXT: fld fa5, 0(a0)
882 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
883 ; RV32-NEXT: vfmv.s.f v10, fa5
884 ; RV32-NEXT: vslideup.vi v8, v10, 3
887 ; RV64-LABEL: expandload_v4f64:
889 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
890 ; RV64-NEXT: vmv.x.s a1, v0
891 ; RV64-NEXT: andi a2, a1, 1
892 ; RV64-NEXT: bnez a2, .LBB10_5
893 ; RV64-NEXT: # %bb.1: # %else
894 ; RV64-NEXT: andi a2, a1, 2
895 ; RV64-NEXT: bnez a2, .LBB10_6
896 ; RV64-NEXT: .LBB10_2: # %else2
897 ; RV64-NEXT: andi a2, a1, 4
898 ; RV64-NEXT: bnez a2, .LBB10_7
899 ; RV64-NEXT: .LBB10_3: # %else6
900 ; RV64-NEXT: andi a1, a1, 8
901 ; RV64-NEXT: bnez a1, .LBB10_8
902 ; RV64-NEXT: .LBB10_4: # %else10
904 ; RV64-NEXT: .LBB10_5: # %cond.load
905 ; RV64-NEXT: fld fa5, 0(a0)
906 ; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma
907 ; RV64-NEXT: vfmv.s.f v8, fa5
908 ; RV64-NEXT: addi a0, a0, 8
909 ; RV64-NEXT: andi a2, a1, 2
910 ; RV64-NEXT: beqz a2, .LBB10_2
911 ; RV64-NEXT: .LBB10_6: # %cond.load1
912 ; RV64-NEXT: fld fa5, 0(a0)
913 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
914 ; RV64-NEXT: vfmv.s.f v10, fa5
915 ; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma
916 ; RV64-NEXT: vslideup.vi v8, v10, 1
917 ; RV64-NEXT: addi a0, a0, 8
918 ; RV64-NEXT: andi a2, a1, 4
919 ; RV64-NEXT: beqz a2, .LBB10_3
920 ; RV64-NEXT: .LBB10_7: # %cond.load5
921 ; RV64-NEXT: fld fa5, 0(a0)
922 ; RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma
923 ; RV64-NEXT: vfmv.s.f v10, fa5
924 ; RV64-NEXT: vslideup.vi v8, v10, 2
925 ; RV64-NEXT: addi a0, a0, 8
926 ; RV64-NEXT: andi a1, a1, 8
927 ; RV64-NEXT: beqz a1, .LBB10_4
928 ; RV64-NEXT: .LBB10_8: # %cond.load9
929 ; RV64-NEXT: fld fa5, 0(a0)
930 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
931 ; RV64-NEXT: vfmv.s.f v10, fa5
932 ; RV64-NEXT: vslideup.vi v8, v10, 3
934 %res = call <4 x double> @llvm.masked.expandload.v4f64(ptr align 8 %base, <4 x i1> %mask, <4 x double> %src0)
938 declare <8 x double> @llvm.masked.expandload.v8f64(ptr, <8 x i1>, <8 x double>)
939 define <8 x double> @expandload_v8f64(ptr %base, <8 x double> %src0, <8 x i1> %mask) {
940 ; RV32-LABEL: expandload_v8f64:
942 ; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
943 ; RV32-NEXT: vmv.x.s a1, v0
944 ; RV32-NEXT: andi a2, a1, 1
945 ; RV32-NEXT: bnez a2, .LBB11_9
946 ; RV32-NEXT: # %bb.1: # %else
947 ; RV32-NEXT: andi a2, a1, 2
948 ; RV32-NEXT: bnez a2, .LBB11_10
949 ; RV32-NEXT: .LBB11_2: # %else2
950 ; RV32-NEXT: andi a2, a1, 4
951 ; RV32-NEXT: bnez a2, .LBB11_11
952 ; RV32-NEXT: .LBB11_3: # %else6
953 ; RV32-NEXT: andi a2, a1, 8
954 ; RV32-NEXT: bnez a2, .LBB11_12
955 ; RV32-NEXT: .LBB11_4: # %else10
956 ; RV32-NEXT: andi a2, a1, 16
957 ; RV32-NEXT: bnez a2, .LBB11_13
958 ; RV32-NEXT: .LBB11_5: # %else14
959 ; RV32-NEXT: andi a2, a1, 32
960 ; RV32-NEXT: bnez a2, .LBB11_14
961 ; RV32-NEXT: .LBB11_6: # %else18
962 ; RV32-NEXT: andi a2, a1, 64
963 ; RV32-NEXT: bnez a2, .LBB11_15
964 ; RV32-NEXT: .LBB11_7: # %else22
965 ; RV32-NEXT: andi a1, a1, -128
966 ; RV32-NEXT: bnez a1, .LBB11_16
967 ; RV32-NEXT: .LBB11_8: # %else26
969 ; RV32-NEXT: .LBB11_9: # %cond.load
970 ; RV32-NEXT: fld fa5, 0(a0)
971 ; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma
972 ; RV32-NEXT: vfmv.s.f v8, fa5
973 ; RV32-NEXT: addi a0, a0, 8
974 ; RV32-NEXT: andi a2, a1, 2
975 ; RV32-NEXT: beqz a2, .LBB11_2
976 ; RV32-NEXT: .LBB11_10: # %cond.load1
977 ; RV32-NEXT: fld fa5, 0(a0)
978 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma
979 ; RV32-NEXT: vfmv.s.f v12, fa5
980 ; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma
981 ; RV32-NEXT: vslideup.vi v8, v12, 1
982 ; RV32-NEXT: addi a0, a0, 8
983 ; RV32-NEXT: andi a2, a1, 4
984 ; RV32-NEXT: beqz a2, .LBB11_3
985 ; RV32-NEXT: .LBB11_11: # %cond.load5
986 ; RV32-NEXT: fld fa5, 0(a0)
987 ; RV32-NEXT: vsetivli zero, 3, e64, m2, tu, ma
988 ; RV32-NEXT: vfmv.s.f v12, fa5
989 ; RV32-NEXT: vslideup.vi v8, v12, 2
990 ; RV32-NEXT: addi a0, a0, 8
991 ; RV32-NEXT: andi a2, a1, 8
992 ; RV32-NEXT: beqz a2, .LBB11_4
993 ; RV32-NEXT: .LBB11_12: # %cond.load9
994 ; RV32-NEXT: fld fa5, 0(a0)
995 ; RV32-NEXT: vsetivli zero, 4, e64, m2, tu, ma
996 ; RV32-NEXT: vfmv.s.f v12, fa5
997 ; RV32-NEXT: vslideup.vi v8, v12, 3
998 ; RV32-NEXT: addi a0, a0, 8
999 ; RV32-NEXT: andi a2, a1, 16
1000 ; RV32-NEXT: beqz a2, .LBB11_5
1001 ; RV32-NEXT: .LBB11_13: # %cond.load13
1002 ; RV32-NEXT: fld fa5, 0(a0)
1003 ; RV32-NEXT: vsetivli zero, 5, e64, m4, tu, ma
1004 ; RV32-NEXT: vfmv.s.f v12, fa5
1005 ; RV32-NEXT: vslideup.vi v8, v12, 4
1006 ; RV32-NEXT: addi a0, a0, 8
1007 ; RV32-NEXT: andi a2, a1, 32
1008 ; RV32-NEXT: beqz a2, .LBB11_6
1009 ; RV32-NEXT: .LBB11_14: # %cond.load17
1010 ; RV32-NEXT: fld fa5, 0(a0)
1011 ; RV32-NEXT: vsetivli zero, 6, e64, m4, tu, ma
1012 ; RV32-NEXT: vfmv.s.f v12, fa5
1013 ; RV32-NEXT: vslideup.vi v8, v12, 5
1014 ; RV32-NEXT: addi a0, a0, 8
1015 ; RV32-NEXT: andi a2, a1, 64
1016 ; RV32-NEXT: beqz a2, .LBB11_7
1017 ; RV32-NEXT: .LBB11_15: # %cond.load21
1018 ; RV32-NEXT: fld fa5, 0(a0)
1019 ; RV32-NEXT: vsetivli zero, 7, e64, m4, tu, ma
1020 ; RV32-NEXT: vfmv.s.f v12, fa5
1021 ; RV32-NEXT: vslideup.vi v8, v12, 6
1022 ; RV32-NEXT: addi a0, a0, 8
1023 ; RV32-NEXT: andi a1, a1, -128
1024 ; RV32-NEXT: beqz a1, .LBB11_8
1025 ; RV32-NEXT: .LBB11_16: # %cond.load25
1026 ; RV32-NEXT: fld fa5, 0(a0)
1027 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1028 ; RV32-NEXT: vfmv.s.f v12, fa5
1029 ; RV32-NEXT: vslideup.vi v8, v12, 7
1032 ; RV64-LABEL: expandload_v8f64:
1034 ; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
1035 ; RV64-NEXT: vmv.x.s a1, v0
1036 ; RV64-NEXT: andi a2, a1, 1
1037 ; RV64-NEXT: bnez a2, .LBB11_9
1038 ; RV64-NEXT: # %bb.1: # %else
1039 ; RV64-NEXT: andi a2, a1, 2
1040 ; RV64-NEXT: bnez a2, .LBB11_10
1041 ; RV64-NEXT: .LBB11_2: # %else2
1042 ; RV64-NEXT: andi a2, a1, 4
1043 ; RV64-NEXT: bnez a2, .LBB11_11
1044 ; RV64-NEXT: .LBB11_3: # %else6
1045 ; RV64-NEXT: andi a2, a1, 8
1046 ; RV64-NEXT: bnez a2, .LBB11_12
1047 ; RV64-NEXT: .LBB11_4: # %else10
1048 ; RV64-NEXT: andi a2, a1, 16
1049 ; RV64-NEXT: bnez a2, .LBB11_13
1050 ; RV64-NEXT: .LBB11_5: # %else14
1051 ; RV64-NEXT: andi a2, a1, 32
1052 ; RV64-NEXT: bnez a2, .LBB11_14
1053 ; RV64-NEXT: .LBB11_6: # %else18
1054 ; RV64-NEXT: andi a2, a1, 64
1055 ; RV64-NEXT: bnez a2, .LBB11_15
1056 ; RV64-NEXT: .LBB11_7: # %else22
1057 ; RV64-NEXT: andi a1, a1, -128
1058 ; RV64-NEXT: bnez a1, .LBB11_16
1059 ; RV64-NEXT: .LBB11_8: # %else26
1061 ; RV64-NEXT: .LBB11_9: # %cond.load
1062 ; RV64-NEXT: fld fa5, 0(a0)
1063 ; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma
1064 ; RV64-NEXT: vfmv.s.f v8, fa5
1065 ; RV64-NEXT: addi a0, a0, 8
1066 ; RV64-NEXT: andi a2, a1, 2
1067 ; RV64-NEXT: beqz a2, .LBB11_2
1068 ; RV64-NEXT: .LBB11_10: # %cond.load1
1069 ; RV64-NEXT: fld fa5, 0(a0)
1070 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
1071 ; RV64-NEXT: vfmv.s.f v12, fa5
1072 ; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma
1073 ; RV64-NEXT: vslideup.vi v8, v12, 1
1074 ; RV64-NEXT: addi a0, a0, 8
1075 ; RV64-NEXT: andi a2, a1, 4
1076 ; RV64-NEXT: beqz a2, .LBB11_3
1077 ; RV64-NEXT: .LBB11_11: # %cond.load5
1078 ; RV64-NEXT: fld fa5, 0(a0)
1079 ; RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma
1080 ; RV64-NEXT: vfmv.s.f v12, fa5
1081 ; RV64-NEXT: vslideup.vi v8, v12, 2
1082 ; RV64-NEXT: addi a0, a0, 8
1083 ; RV64-NEXT: andi a2, a1, 8
1084 ; RV64-NEXT: beqz a2, .LBB11_4
1085 ; RV64-NEXT: .LBB11_12: # %cond.load9
1086 ; RV64-NEXT: fld fa5, 0(a0)
1087 ; RV64-NEXT: vsetivli zero, 4, e64, m2, tu, ma
1088 ; RV64-NEXT: vfmv.s.f v12, fa5
1089 ; RV64-NEXT: vslideup.vi v8, v12, 3
1090 ; RV64-NEXT: addi a0, a0, 8
1091 ; RV64-NEXT: andi a2, a1, 16
1092 ; RV64-NEXT: beqz a2, .LBB11_5
1093 ; RV64-NEXT: .LBB11_13: # %cond.load13
1094 ; RV64-NEXT: fld fa5, 0(a0)
1095 ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma
1096 ; RV64-NEXT: vfmv.s.f v12, fa5
1097 ; RV64-NEXT: vslideup.vi v8, v12, 4
1098 ; RV64-NEXT: addi a0, a0, 8
1099 ; RV64-NEXT: andi a2, a1, 32
1100 ; RV64-NEXT: beqz a2, .LBB11_6
1101 ; RV64-NEXT: .LBB11_14: # %cond.load17
1102 ; RV64-NEXT: fld fa5, 0(a0)
1103 ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma
1104 ; RV64-NEXT: vfmv.s.f v12, fa5
1105 ; RV64-NEXT: vslideup.vi v8, v12, 5
1106 ; RV64-NEXT: addi a0, a0, 8
1107 ; RV64-NEXT: andi a2, a1, 64
1108 ; RV64-NEXT: beqz a2, .LBB11_7
1109 ; RV64-NEXT: .LBB11_15: # %cond.load21
1110 ; RV64-NEXT: fld fa5, 0(a0)
1111 ; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma
1112 ; RV64-NEXT: vfmv.s.f v12, fa5
1113 ; RV64-NEXT: vslideup.vi v8, v12, 6
1114 ; RV64-NEXT: addi a0, a0, 8
1115 ; RV64-NEXT: andi a1, a1, -128
1116 ; RV64-NEXT: beqz a1, .LBB11_8
1117 ; RV64-NEXT: .LBB11_16: # %cond.load25
1118 ; RV64-NEXT: fld fa5, 0(a0)
1119 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
1120 ; RV64-NEXT: vfmv.s.f v12, fa5
1121 ; RV64-NEXT: vslideup.vi v8, v12, 7
1123 %res = call <8 x double> @llvm.masked.expandload.v8f64(ptr align 8 %base, <8 x i1> %mask, <8 x double> %src0)
1124 ret <8 x double>%res