1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
5 define void @masked_load_v1f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
6 ; CHECK-LABEL: masked_load_v1f16:
8 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
9 ; CHECK-NEXT: vle16.v v8, (a1)
10 ; CHECK-NEXT: fmv.h.x fa5, zero
11 ; CHECK-NEXT: vmfeq.vf v0, v8, fa5
12 ; CHECK-NEXT: vle16.v v8, (a0), v0.t
13 ; CHECK-NEXT: vse16.v v8, (a2)
15 %m = load <1 x half>, ptr %m_ptr
16 %mask = fcmp oeq <1 x half> %m, zeroinitializer
17 %load = call <1 x half> @llvm.masked.load.v1f16(ptr %a, i32 8, <1 x i1> %mask, <1 x half> undef)
18 store <1 x half> %load, ptr %res_ptr
21 declare <1 x half> @llvm.masked.load.v1f16(ptr, i32, <1 x i1>, <1 x half>)
23 define void @masked_load_v1f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
24 ; CHECK-LABEL: masked_load_v1f32:
26 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
27 ; CHECK-NEXT: vle32.v v8, (a1)
28 ; CHECK-NEXT: fmv.w.x fa5, zero
29 ; CHECK-NEXT: vmfeq.vf v0, v8, fa5
30 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
31 ; CHECK-NEXT: vse32.v v8, (a2)
33 %m = load <1 x float>, ptr %m_ptr
34 %mask = fcmp oeq <1 x float> %m, zeroinitializer
35 %load = call <1 x float> @llvm.masked.load.v1f32(ptr %a, i32 8, <1 x i1> %mask, <1 x float> undef)
36 store <1 x float> %load, ptr %res_ptr
39 declare <1 x float> @llvm.masked.load.v1f32(ptr, i32, <1 x i1>, <1 x float>)
41 define void @masked_load_v1f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
42 ; RV32-LABEL: masked_load_v1f64:
44 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
45 ; RV32-NEXT: vle64.v v8, (a1)
46 ; RV32-NEXT: fcvt.d.w fa5, zero
47 ; RV32-NEXT: vmfeq.vf v0, v8, fa5
48 ; RV32-NEXT: vle64.v v8, (a0), v0.t
49 ; RV32-NEXT: vse64.v v8, (a2)
52 ; RV64-LABEL: masked_load_v1f64:
54 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
55 ; RV64-NEXT: vle64.v v8, (a1)
56 ; RV64-NEXT: fmv.d.x fa5, zero
57 ; RV64-NEXT: vmfeq.vf v0, v8, fa5
58 ; RV64-NEXT: vle64.v v8, (a0), v0.t
59 ; RV64-NEXT: vse64.v v8, (a2)
61 %m = load <1 x double>, ptr %m_ptr
62 %mask = fcmp oeq <1 x double> %m, zeroinitializer
63 %load = call <1 x double> @llvm.masked.load.v1f64(ptr %a, i32 8, <1 x i1> %mask, <1 x double> undef)
64 store <1 x double> %load, ptr %res_ptr
67 declare <1 x double> @llvm.masked.load.v1f64(ptr, i32, <1 x i1>, <1 x double>)
69 define void @masked_load_v2f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
70 ; CHECK-LABEL: masked_load_v2f16:
72 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
73 ; CHECK-NEXT: vle16.v v8, (a1)
74 ; CHECK-NEXT: fmv.h.x fa5, zero
75 ; CHECK-NEXT: vmfeq.vf v0, v8, fa5
76 ; CHECK-NEXT: vle16.v v8, (a0), v0.t
77 ; CHECK-NEXT: vse16.v v8, (a2)
79 %m = load <2 x half>, ptr %m_ptr
80 %mask = fcmp oeq <2 x half> %m, zeroinitializer
81 %load = call <2 x half> @llvm.masked.load.v2f16(ptr %a, i32 8, <2 x i1> %mask, <2 x half> undef)
82 store <2 x half> %load, ptr %res_ptr
85 declare <2 x half> @llvm.masked.load.v2f16(ptr, i32, <2 x i1>, <2 x half>)
87 define void @masked_load_v2f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
88 ; CHECK-LABEL: masked_load_v2f32:
90 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
91 ; CHECK-NEXT: vle32.v v8, (a1)
92 ; CHECK-NEXT: fmv.w.x fa5, zero
93 ; CHECK-NEXT: vmfeq.vf v0, v8, fa5
94 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
95 ; CHECK-NEXT: vse32.v v8, (a2)
97 %m = load <2 x float>, ptr %m_ptr
98 %mask = fcmp oeq <2 x float> %m, zeroinitializer
99 %load = call <2 x float> @llvm.masked.load.v2f32(ptr %a, i32 8, <2 x i1> %mask, <2 x float> undef)
100 store <2 x float> %load, ptr %res_ptr
103 declare <2 x float> @llvm.masked.load.v2f32(ptr, i32, <2 x i1>, <2 x float>)
105 define void @masked_load_v2f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
106 ; RV32-LABEL: masked_load_v2f64:
108 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
109 ; RV32-NEXT: vle64.v v8, (a1)
110 ; RV32-NEXT: fcvt.d.w fa5, zero
111 ; RV32-NEXT: vmfeq.vf v0, v8, fa5
112 ; RV32-NEXT: vle64.v v8, (a0), v0.t
113 ; RV32-NEXT: vse64.v v8, (a2)
116 ; RV64-LABEL: masked_load_v2f64:
118 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
119 ; RV64-NEXT: vle64.v v8, (a1)
120 ; RV64-NEXT: fmv.d.x fa5, zero
121 ; RV64-NEXT: vmfeq.vf v0, v8, fa5
122 ; RV64-NEXT: vle64.v v8, (a0), v0.t
123 ; RV64-NEXT: vse64.v v8, (a2)
125 %m = load <2 x double>, ptr %m_ptr
126 %mask = fcmp oeq <2 x double> %m, zeroinitializer
127 %load = call <2 x double> @llvm.masked.load.v2f64(ptr %a, i32 8, <2 x i1> %mask, <2 x double> undef)
128 store <2 x double> %load, ptr %res_ptr
131 declare <2 x double> @llvm.masked.load.v2f64(ptr, i32, <2 x i1>, <2 x double>)
133 define void @masked_load_v4f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
134 ; CHECK-LABEL: masked_load_v4f16:
136 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
137 ; CHECK-NEXT: vle16.v v8, (a1)
138 ; CHECK-NEXT: fmv.h.x fa5, zero
139 ; CHECK-NEXT: vmfeq.vf v0, v8, fa5
140 ; CHECK-NEXT: vle16.v v8, (a0), v0.t
141 ; CHECK-NEXT: vse16.v v8, (a2)
143 %m = load <4 x half>, ptr %m_ptr
144 %mask = fcmp oeq <4 x half> %m, zeroinitializer
145 %load = call <4 x half> @llvm.masked.load.v4f16(ptr %a, i32 8, <4 x i1> %mask, <4 x half> undef)
146 store <4 x half> %load, ptr %res_ptr
149 declare <4 x half> @llvm.masked.load.v4f16(ptr, i32, <4 x i1>, <4 x half>)
151 define void @masked_load_v4f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
152 ; CHECK-LABEL: masked_load_v4f32:
154 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
155 ; CHECK-NEXT: vle32.v v8, (a1)
156 ; CHECK-NEXT: fmv.w.x fa5, zero
157 ; CHECK-NEXT: vmfeq.vf v0, v8, fa5
158 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
159 ; CHECK-NEXT: vse32.v v8, (a2)
161 %m = load <4 x float>, ptr %m_ptr
162 %mask = fcmp oeq <4 x float> %m, zeroinitializer
163 %load = call <4 x float> @llvm.masked.load.v4f32(ptr %a, i32 8, <4 x i1> %mask, <4 x float> undef)
164 store <4 x float> %load, ptr %res_ptr
167 declare <4 x float> @llvm.masked.load.v4f32(ptr, i32, <4 x i1>, <4 x float>)
169 define void @masked_load_v4f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
170 ; RV32-LABEL: masked_load_v4f64:
172 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
173 ; RV32-NEXT: vle64.v v8, (a1)
174 ; RV32-NEXT: fcvt.d.w fa5, zero
175 ; RV32-NEXT: vmfeq.vf v0, v8, fa5
176 ; RV32-NEXT: vle64.v v8, (a0), v0.t
177 ; RV32-NEXT: vse64.v v8, (a2)
180 ; RV64-LABEL: masked_load_v4f64:
182 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
183 ; RV64-NEXT: vle64.v v8, (a1)
184 ; RV64-NEXT: fmv.d.x fa5, zero
185 ; RV64-NEXT: vmfeq.vf v0, v8, fa5
186 ; RV64-NEXT: vle64.v v8, (a0), v0.t
187 ; RV64-NEXT: vse64.v v8, (a2)
189 %m = load <4 x double>, ptr %m_ptr
190 %mask = fcmp oeq <4 x double> %m, zeroinitializer
191 %load = call <4 x double> @llvm.masked.load.v4f64(ptr %a, i32 8, <4 x i1> %mask, <4 x double> undef)
192 store <4 x double> %load, ptr %res_ptr
195 declare <4 x double> @llvm.masked.load.v4f64(ptr, i32, <4 x i1>, <4 x double>)
197 define void @masked_load_v8f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
198 ; CHECK-LABEL: masked_load_v8f16:
200 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
201 ; CHECK-NEXT: vle16.v v8, (a1)
202 ; CHECK-NEXT: fmv.h.x fa5, zero
203 ; CHECK-NEXT: vmfeq.vf v0, v8, fa5
204 ; CHECK-NEXT: vle16.v v8, (a0), v0.t
205 ; CHECK-NEXT: vse16.v v8, (a2)
207 %m = load <8 x half>, ptr %m_ptr
208 %mask = fcmp oeq <8 x half> %m, zeroinitializer
209 %load = call <8 x half> @llvm.masked.load.v8f16(ptr %a, i32 8, <8 x i1> %mask, <8 x half> undef)
210 store <8 x half> %load, ptr %res_ptr
213 declare <8 x half> @llvm.masked.load.v8f16(ptr, i32, <8 x i1>, <8 x half>)
215 define void @masked_load_v8f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
216 ; CHECK-LABEL: masked_load_v8f32:
218 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
219 ; CHECK-NEXT: vle32.v v8, (a1)
220 ; CHECK-NEXT: fmv.w.x fa5, zero
221 ; CHECK-NEXT: vmfeq.vf v0, v8, fa5
222 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
223 ; CHECK-NEXT: vse32.v v8, (a2)
225 %m = load <8 x float>, ptr %m_ptr
226 %mask = fcmp oeq <8 x float> %m, zeroinitializer
227 %load = call <8 x float> @llvm.masked.load.v8f32(ptr %a, i32 8, <8 x i1> %mask, <8 x float> undef)
228 store <8 x float> %load, ptr %res_ptr
231 declare <8 x float> @llvm.masked.load.v8f32(ptr, i32, <8 x i1>, <8 x float>)
233 define void @masked_load_v8f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
234 ; RV32-LABEL: masked_load_v8f64:
236 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma
237 ; RV32-NEXT: vle64.v v8, (a1)
238 ; RV32-NEXT: fcvt.d.w fa5, zero
239 ; RV32-NEXT: vmfeq.vf v0, v8, fa5
240 ; RV32-NEXT: vle64.v v8, (a0), v0.t
241 ; RV32-NEXT: vse64.v v8, (a2)
244 ; RV64-LABEL: masked_load_v8f64:
246 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
247 ; RV64-NEXT: vle64.v v8, (a1)
248 ; RV64-NEXT: fmv.d.x fa5, zero
249 ; RV64-NEXT: vmfeq.vf v0, v8, fa5
250 ; RV64-NEXT: vle64.v v8, (a0), v0.t
251 ; RV64-NEXT: vse64.v v8, (a2)
253 %m = load <8 x double>, ptr %m_ptr
254 %mask = fcmp oeq <8 x double> %m, zeroinitializer
255 %load = call <8 x double> @llvm.masked.load.v8f64(ptr %a, i32 8, <8 x i1> %mask, <8 x double> undef)
256 store <8 x double> %load, ptr %res_ptr
259 declare <8 x double> @llvm.masked.load.v8f64(ptr, i32, <8 x i1>, <8 x double>)
261 define void @masked_load_v16f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
262 ; CHECK-LABEL: masked_load_v16f16:
264 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
265 ; CHECK-NEXT: vle16.v v8, (a1)
266 ; CHECK-NEXT: fmv.h.x fa5, zero
267 ; CHECK-NEXT: vmfeq.vf v0, v8, fa5
268 ; CHECK-NEXT: vle16.v v8, (a0), v0.t
269 ; CHECK-NEXT: vse16.v v8, (a2)
271 %m = load <16 x half>, ptr %m_ptr
272 %mask = fcmp oeq <16 x half> %m, zeroinitializer
273 %load = call <16 x half> @llvm.masked.load.v16f16(ptr %a, i32 8, <16 x i1> %mask, <16 x half> undef)
274 store <16 x half> %load, ptr %res_ptr
277 declare <16 x half> @llvm.masked.load.v16f16(ptr, i32, <16 x i1>, <16 x half>)
279 define void @masked_load_v16f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
280 ; CHECK-LABEL: masked_load_v16f32:
282 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
283 ; CHECK-NEXT: vle32.v v8, (a1)
284 ; CHECK-NEXT: fmv.w.x fa5, zero
285 ; CHECK-NEXT: vmfeq.vf v0, v8, fa5
286 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
287 ; CHECK-NEXT: vse32.v v8, (a2)
289 %m = load <16 x float>, ptr %m_ptr
290 %mask = fcmp oeq <16 x float> %m, zeroinitializer
291 %load = call <16 x float> @llvm.masked.load.v16f32(ptr %a, i32 8, <16 x i1> %mask, <16 x float> undef)
292 store <16 x float> %load, ptr %res_ptr
295 declare <16 x float> @llvm.masked.load.v16f32(ptr, i32, <16 x i1>, <16 x float>)
297 define void @masked_load_v16f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
298 ; RV32-LABEL: masked_load_v16f64:
300 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
301 ; RV32-NEXT: vle64.v v8, (a1)
302 ; RV32-NEXT: fcvt.d.w fa5, zero
303 ; RV32-NEXT: vmfeq.vf v0, v8, fa5
304 ; RV32-NEXT: vle64.v v8, (a0), v0.t
305 ; RV32-NEXT: vse64.v v8, (a2)
308 ; RV64-LABEL: masked_load_v16f64:
310 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
311 ; RV64-NEXT: vle64.v v8, (a1)
312 ; RV64-NEXT: fmv.d.x fa5, zero
313 ; RV64-NEXT: vmfeq.vf v0, v8, fa5
314 ; RV64-NEXT: vle64.v v8, (a0), v0.t
315 ; RV64-NEXT: vse64.v v8, (a2)
317 %m = load <16 x double>, ptr %m_ptr
318 %mask = fcmp oeq <16 x double> %m, zeroinitializer
319 %load = call <16 x double> @llvm.masked.load.v16f64(ptr %a, i32 8, <16 x i1> %mask, <16 x double> undef)
320 store <16 x double> %load, ptr %res_ptr
323 declare <16 x double> @llvm.masked.load.v16f64(ptr, i32, <16 x i1>, <16 x double>)
325 define void @masked_load_v32f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
326 ; CHECK-LABEL: masked_load_v32f16:
328 ; CHECK-NEXT: li a3, 32
329 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma
330 ; CHECK-NEXT: vle16.v v8, (a1)
331 ; CHECK-NEXT: fmv.h.x fa5, zero
332 ; CHECK-NEXT: vmfeq.vf v0, v8, fa5
333 ; CHECK-NEXT: vle16.v v8, (a0), v0.t
334 ; CHECK-NEXT: vse16.v v8, (a2)
336 %m = load <32 x half>, ptr %m_ptr
337 %mask = fcmp oeq <32 x half> %m, zeroinitializer
338 %load = call <32 x half> @llvm.masked.load.v32f16(ptr %a, i32 8, <32 x i1> %mask, <32 x half> undef)
339 store <32 x half> %load, ptr %res_ptr
342 declare <32 x half> @llvm.masked.load.v32f16(ptr, i32, <32 x i1>, <32 x half>)
344 define void @masked_load_v32f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
345 ; CHECK-LABEL: masked_load_v32f32:
347 ; CHECK-NEXT: li a3, 32
348 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
349 ; CHECK-NEXT: vle32.v v8, (a1)
350 ; CHECK-NEXT: fmv.w.x fa5, zero
351 ; CHECK-NEXT: vmfeq.vf v0, v8, fa5
352 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
353 ; CHECK-NEXT: vse32.v v8, (a2)
355 %m = load <32 x float>, ptr %m_ptr
356 %mask = fcmp oeq <32 x float> %m, zeroinitializer
357 %load = call <32 x float> @llvm.masked.load.v32f32(ptr %a, i32 8, <32 x i1> %mask, <32 x float> undef)
358 store <32 x float> %load, ptr %res_ptr
361 declare <32 x float> @llvm.masked.load.v32f32(ptr, i32, <32 x i1>, <32 x float>)
363 define void @masked_load_v32f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
364 ; RV32-LABEL: masked_load_v32f64:
366 ; RV32-NEXT: addi a3, a1, 128
367 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
368 ; RV32-NEXT: vle64.v v16, (a1)
369 ; RV32-NEXT: vle64.v v24, (a3)
370 ; RV32-NEXT: fcvt.d.w fa5, zero
371 ; RV32-NEXT: vmfeq.vf v8, v16, fa5
372 ; RV32-NEXT: vmfeq.vf v0, v24, fa5
373 ; RV32-NEXT: addi a1, a0, 128
374 ; RV32-NEXT: vle64.v v16, (a1), v0.t
375 ; RV32-NEXT: vmv1r.v v0, v8
376 ; RV32-NEXT: vle64.v v8, (a0), v0.t
377 ; RV32-NEXT: vse64.v v8, (a2)
378 ; RV32-NEXT: addi a0, a2, 128
379 ; RV32-NEXT: vse64.v v16, (a0)
382 ; RV64-LABEL: masked_load_v32f64:
384 ; RV64-NEXT: addi a3, a1, 128
385 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
386 ; RV64-NEXT: vle64.v v16, (a1)
387 ; RV64-NEXT: vle64.v v24, (a3)
388 ; RV64-NEXT: fmv.d.x fa5, zero
389 ; RV64-NEXT: vmfeq.vf v8, v16, fa5
390 ; RV64-NEXT: vmfeq.vf v0, v24, fa5
391 ; RV64-NEXT: addi a1, a0, 128
392 ; RV64-NEXT: vle64.v v16, (a1), v0.t
393 ; RV64-NEXT: vmv1r.v v0, v8
394 ; RV64-NEXT: vle64.v v8, (a0), v0.t
395 ; RV64-NEXT: vse64.v v8, (a2)
396 ; RV64-NEXT: addi a0, a2, 128
397 ; RV64-NEXT: vse64.v v16, (a0)
399 %m = load <32 x double>, ptr %m_ptr
400 %mask = fcmp oeq <32 x double> %m, zeroinitializer
401 %load = call <32 x double> @llvm.masked.load.v32f64(ptr %a, i32 8, <32 x i1> %mask, <32 x double> undef)
402 store <32 x double> %load, ptr %res_ptr
405 declare <32 x double> @llvm.masked.load.v32f64(ptr, i32, <32 x i1>, <32 x double>)
407 define void @masked_load_v64f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
408 ; CHECK-LABEL: masked_load_v64f16:
410 ; CHECK-NEXT: li a3, 64
411 ; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma
412 ; CHECK-NEXT: vle16.v v8, (a1)
413 ; CHECK-NEXT: fmv.h.x fa5, zero
414 ; CHECK-NEXT: vmfeq.vf v0, v8, fa5
415 ; CHECK-NEXT: vle16.v v8, (a0), v0.t
416 ; CHECK-NEXT: vse16.v v8, (a2)
418 %m = load <64 x half>, ptr %m_ptr
419 %mask = fcmp oeq <64 x half> %m, zeroinitializer
420 %load = call <64 x half> @llvm.masked.load.v64f16(ptr %a, i32 8, <64 x i1> %mask, <64 x half> undef)
421 store <64 x half> %load, ptr %res_ptr
424 declare <64 x half> @llvm.masked.load.v64f16(ptr, i32, <64 x i1>, <64 x half>)
426 define void @masked_load_v64f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
427 ; CHECK-LABEL: masked_load_v64f32:
429 ; CHECK-NEXT: addi a3, a1, 128
430 ; CHECK-NEXT: li a4, 32
431 ; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma
432 ; CHECK-NEXT: vle32.v v16, (a1)
433 ; CHECK-NEXT: vle32.v v24, (a3)
434 ; CHECK-NEXT: fmv.w.x fa5, zero
435 ; CHECK-NEXT: vmfeq.vf v8, v16, fa5
436 ; CHECK-NEXT: vmfeq.vf v0, v24, fa5
437 ; CHECK-NEXT: addi a1, a0, 128
438 ; CHECK-NEXT: vle32.v v16, (a1), v0.t
439 ; CHECK-NEXT: vmv1r.v v0, v8
440 ; CHECK-NEXT: vle32.v v8, (a0), v0.t
441 ; CHECK-NEXT: vse32.v v8, (a2)
442 ; CHECK-NEXT: addi a0, a2, 128
443 ; CHECK-NEXT: vse32.v v16, (a0)
445 %m = load <64 x float>, ptr %m_ptr
446 %mask = fcmp oeq <64 x float> %m, zeroinitializer
447 %load = call <64 x float> @llvm.masked.load.v64f32(ptr %a, i32 8, <64 x i1> %mask, <64 x float> undef)
448 store <64 x float> %load, ptr %res_ptr
451 declare <64 x float> @llvm.masked.load.v64f32(ptr, i32, <64 x i1>, <64 x float>)
453 define void @masked_load_v128f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind {
454 ; CHECK-LABEL: masked_load_v128f16:
456 ; CHECK-NEXT: addi a3, a1, 128
457 ; CHECK-NEXT: li a4, 64
458 ; CHECK-NEXT: vsetvli zero, a4, e16, m8, ta, ma
459 ; CHECK-NEXT: vle16.v v16, (a1)
460 ; CHECK-NEXT: vle16.v v24, (a3)
461 ; CHECK-NEXT: fmv.h.x fa5, zero
462 ; CHECK-NEXT: vmfeq.vf v8, v16, fa5
463 ; CHECK-NEXT: vmfeq.vf v0, v24, fa5
464 ; CHECK-NEXT: addi a1, a0, 128
465 ; CHECK-NEXT: vle16.v v16, (a1), v0.t
466 ; CHECK-NEXT: vmv1r.v v0, v8
467 ; CHECK-NEXT: vle16.v v8, (a0), v0.t
468 ; CHECK-NEXT: vse16.v v8, (a2)
469 ; CHECK-NEXT: addi a0, a2, 128
470 ; CHECK-NEXT: vse16.v v16, (a0)
472 %m = load <128 x half>, ptr %m_ptr
473 %mask = fcmp oeq <128 x half> %m, zeroinitializer
474 %load = call <128 x half> @llvm.masked.load.v128f16(ptr %a, i32 8, <128 x i1> %mask, <128 x half> undef)
475 store <128 x half> %load, ptr %res_ptr
478 declare <128 x half> @llvm.masked.load.v128f16(ptr, i32, <128 x i1>, <128 x half>)