1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s
7 declare <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>, <2 x i1>, i32)
9 define <2 x half> @vfma_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
10 ; CHECK-LABEL: vfma_vv_v2f16:
12 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
13 ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
14 ; CHECK-NEXT: vmv1r.v v8, v9
16 %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl)
20 define <2 x half> @vfma_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, <2 x half> %c, i32 zeroext %evl) {
21 ; CHECK-LABEL: vfma_vv_v2f16_unmasked:
23 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
24 ; CHECK-NEXT: vfmadd.vv v8, v9, v10
26 %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 true), i32 %evl)
30 define <2 x half> @vfma_vf_v2f16(<2 x half> %va, half %b, <2 x half> %vc, <2 x i1> %m, i32 zeroext %evl) {
31 ; CHECK-LABEL: vfma_vf_v2f16:
33 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
34 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
36 %elt.head = insertelement <2 x half> poison, half %b, i32 0
37 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
38 %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> %m, i32 %evl)
42 define <2 x half> @vfma_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> %vc, i32 zeroext %evl) {
43 ; CHECK-LABEL: vfma_vf_v2f16_unmasked:
45 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
46 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9
48 %elt.head = insertelement <2 x half> poison, half %b, i32 0
49 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
50 %v = call <2 x half> @llvm.vp.fmuladd.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> splat (i1 true), i32 %evl)
54 declare <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x i1>, i32)
56 define <4 x half> @vfma_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
57 ; CHECK-LABEL: vfma_vv_v4f16:
59 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
60 ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
61 ; CHECK-NEXT: vmv1r.v v8, v9
63 %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl)
67 define <4 x half> @vfma_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, <4 x half> %c, i32 zeroext %evl) {
68 ; CHECK-LABEL: vfma_vv_v4f16_unmasked:
70 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
71 ; CHECK-NEXT: vfmadd.vv v8, v9, v10
73 %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 true), i32 %evl)
77 define <4 x half> @vfma_vf_v4f16(<4 x half> %va, half %b, <4 x half> %vc, <4 x i1> %m, i32 zeroext %evl) {
78 ; CHECK-LABEL: vfma_vf_v4f16:
80 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
81 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
83 %elt.head = insertelement <4 x half> poison, half %b, i32 0
84 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
85 %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> %m, i32 %evl)
89 define <4 x half> @vfma_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> %vc, i32 zeroext %evl) {
90 ; CHECK-LABEL: vfma_vf_v4f16_unmasked:
92 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
93 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9
95 %elt.head = insertelement <4 x half> poison, half %b, i32 0
96 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
97 %v = call <4 x half> @llvm.vp.fmuladd.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> splat (i1 true), i32 %evl)
101 declare <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x i1>, i32)
103 define <8 x half> @vfma_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
104 ; CHECK-LABEL: vfma_vv_v8f16:
106 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
107 ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
108 ; CHECK-NEXT: vmv.v.v v8, v9
110 %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl)
114 define <8 x half> @vfma_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, <8 x half> %c, i32 zeroext %evl) {
115 ; CHECK-LABEL: vfma_vv_v8f16_unmasked:
117 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
118 ; CHECK-NEXT: vfmadd.vv v8, v9, v10
120 %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 true), i32 %evl)
124 define <8 x half> @vfma_vf_v8f16(<8 x half> %va, half %b, <8 x half> %vc, <8 x i1> %m, i32 zeroext %evl) {
125 ; CHECK-LABEL: vfma_vf_v8f16:
127 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
128 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
130 %elt.head = insertelement <8 x half> poison, half %b, i32 0
131 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
132 %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> %m, i32 %evl)
136 define <8 x half> @vfma_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> %vc, i32 zeroext %evl) {
137 ; CHECK-LABEL: vfma_vf_v8f16_unmasked:
139 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
140 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9
142 %elt.head = insertelement <8 x half> poison, half %b, i32 0
143 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
144 %v = call <8 x half> @llvm.vp.fmuladd.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> splat (i1 true), i32 %evl)
148 declare <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half>, <16 x half>, <16 x half>, <16 x i1>, i32)
150 define <16 x half> @vfma_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
151 ; CHECK-LABEL: vfma_vv_v16f16:
153 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
154 ; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t
155 ; CHECK-NEXT: vmv.v.v v8, v10
157 %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl)
161 define <16 x half> @vfma_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, <16 x half> %c, i32 zeroext %evl) {
162 ; CHECK-LABEL: vfma_vv_v16f16_unmasked:
164 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
165 ; CHECK-NEXT: vfmadd.vv v8, v10, v12
167 %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 true), i32 %evl)
171 define <16 x half> @vfma_vf_v16f16(<16 x half> %va, half %b, <16 x half> %vc, <16 x i1> %m, i32 zeroext %evl) {
172 ; CHECK-LABEL: vfma_vf_v16f16:
174 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
175 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t
177 %elt.head = insertelement <16 x half> poison, half %b, i32 0
178 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
179 %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> %m, i32 %evl)
183 define <16 x half> @vfma_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x half> %vc, i32 zeroext %evl) {
184 ; CHECK-LABEL: vfma_vf_v16f16_unmasked:
186 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
187 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10
189 %elt.head = insertelement <16 x half> poison, half %b, i32 0
190 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
191 %v = call <16 x half> @llvm.vp.fmuladd.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> splat (i1 true), i32 %evl)
195 declare <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>, <2 x i1>, i32)
197 define <2 x float> @vfma_vv_v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
198 ; CHECK-LABEL: vfma_vv_v2f32:
200 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
201 ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
202 ; CHECK-NEXT: vmv1r.v v8, v9
204 %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 %evl)
208 define <2 x float> @vfma_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %b, <2 x float> %c, i32 zeroext %evl) {
209 ; CHECK-LABEL: vfma_vv_v2f32_unmasked:
211 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
212 ; CHECK-NEXT: vfmadd.vv v8, v9, v10
214 %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> splat (i1 true), i32 %evl)
218 define <2 x float> @vfma_vf_v2f32(<2 x float> %va, float %b, <2 x float> %vc, <2 x i1> %m, i32 zeroext %evl) {
219 ; CHECK-LABEL: vfma_vf_v2f32:
221 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
222 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
224 %elt.head = insertelement <2 x float> poison, float %b, i32 0
225 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
226 %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> %m, i32 %evl)
230 define <2 x float> @vfma_vf_v2f32_unmasked(<2 x float> %va, float %b, <2 x float> %vc, i32 zeroext %evl) {
231 ; CHECK-LABEL: vfma_vf_v2f32_unmasked:
233 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
234 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9
236 %elt.head = insertelement <2 x float> poison, float %b, i32 0
237 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
238 %v = call <2 x float> @llvm.vp.fmuladd.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> splat (i1 true), i32 %evl)
242 declare <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
244 define <4 x float> @vfma_vv_v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
245 ; CHECK-LABEL: vfma_vv_v4f32:
247 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
248 ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
249 ; CHECK-NEXT: vmv.v.v v8, v9
251 %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 %evl)
255 define <4 x float> @vfma_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %b, <4 x float> %c, i32 zeroext %evl) {
256 ; CHECK-LABEL: vfma_vv_v4f32_unmasked:
258 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
259 ; CHECK-NEXT: vfmadd.vv v8, v9, v10
261 %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> splat (i1 true), i32 %evl)
265 define <4 x float> @vfma_vf_v4f32(<4 x float> %va, float %b, <4 x float> %vc, <4 x i1> %m, i32 zeroext %evl) {
266 ; CHECK-LABEL: vfma_vf_v4f32:
268 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
269 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
271 %elt.head = insertelement <4 x float> poison, float %b, i32 0
272 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
273 %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> %m, i32 %evl)
277 define <4 x float> @vfma_vf_v4f32_unmasked(<4 x float> %va, float %b, <4 x float> %vc, i32 zeroext %evl) {
278 ; CHECK-LABEL: vfma_vf_v4f32_unmasked:
280 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
281 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9
283 %elt.head = insertelement <4 x float> poison, float %b, i32 0
284 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
285 %v = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> splat (i1 true), i32 %evl)
289 declare <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32)
291 define <8 x float> @vfma_vv_v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
292 ; CHECK-LABEL: vfma_vv_v8f32:
294 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
295 ; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t
296 ; CHECK-NEXT: vmv.v.v v8, v10
298 %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 %evl)
302 define <8 x float> @vfma_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %b, <8 x float> %c, i32 zeroext %evl) {
303 ; CHECK-LABEL: vfma_vv_v8f32_unmasked:
305 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
306 ; CHECK-NEXT: vfmadd.vv v8, v10, v12
308 %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> splat (i1 true), i32 %evl)
312 define <8 x float> @vfma_vf_v8f32(<8 x float> %va, float %b, <8 x float> %vc, <8 x i1> %m, i32 zeroext %evl) {
313 ; CHECK-LABEL: vfma_vf_v8f32:
315 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
316 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t
318 %elt.head = insertelement <8 x float> poison, float %b, i32 0
319 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
320 %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> %m, i32 %evl)
324 define <8 x float> @vfma_vf_v8f32_unmasked(<8 x float> %va, float %b, <8 x float> %vc, i32 zeroext %evl) {
325 ; CHECK-LABEL: vfma_vf_v8f32_unmasked:
327 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
328 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10
330 %elt.head = insertelement <8 x float> poison, float %b, i32 0
331 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
332 %v = call <8 x float> @llvm.vp.fmuladd.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> splat (i1 true), i32 %evl)
336 declare <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>, <16 x i1>, i32)
338 define <16 x float> @vfma_vv_v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
339 ; CHECK-LABEL: vfma_vv_v16f32:
341 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
342 ; CHECK-NEXT: vfmadd.vv v12, v8, v16, v0.t
343 ; CHECK-NEXT: vmv.v.v v8, v12
345 %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 %evl)
349 define <16 x float> @vfma_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %b, <16 x float> %c, i32 zeroext %evl) {
350 ; CHECK-LABEL: vfma_vv_v16f32_unmasked:
352 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
353 ; CHECK-NEXT: vfmadd.vv v8, v12, v16
355 %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> splat (i1 true), i32 %evl)
359 define <16 x float> @vfma_vf_v16f32(<16 x float> %va, float %b, <16 x float> %vc, <16 x i1> %m, i32 zeroext %evl) {
360 ; CHECK-LABEL: vfma_vf_v16f32:
362 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
363 ; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t
365 %elt.head = insertelement <16 x float> poison, float %b, i32 0
366 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
367 %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> %m, i32 %evl)
371 define <16 x float> @vfma_vf_v16f32_unmasked(<16 x float> %va, float %b, <16 x float> %vc, i32 zeroext %evl) {
372 ; CHECK-LABEL: vfma_vf_v16f32_unmasked:
374 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
375 ; CHECK-NEXT: vfmadd.vf v8, fa0, v12
377 %elt.head = insertelement <16 x float> poison, float %b, i32 0
378 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
379 %v = call <16 x float> @llvm.vp.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> splat (i1 true), i32 %evl)
383 declare <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>, <2 x i1>, i32)
385 define <2 x double> @vfma_vv_v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
386 ; CHECK-LABEL: vfma_vv_v2f64:
388 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
389 ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
390 ; CHECK-NEXT: vmv.v.v v8, v9
392 %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 %evl)
396 define <2 x double> @vfma_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %b, <2 x double> %c, i32 zeroext %evl) {
397 ; CHECK-LABEL: vfma_vv_v2f64_unmasked:
399 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
400 ; CHECK-NEXT: vfmadd.vv v8, v9, v10
402 %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> splat (i1 true), i32 %evl)
406 define <2 x double> @vfma_vf_v2f64(<2 x double> %va, double %b, <2 x double> %vc, <2 x i1> %m, i32 zeroext %evl) {
407 ; CHECK-LABEL: vfma_vf_v2f64:
409 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
410 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
412 %elt.head = insertelement <2 x double> poison, double %b, i32 0
413 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
414 %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> %m, i32 %evl)
418 define <2 x double> @vfma_vf_v2f64_unmasked(<2 x double> %va, double %b, <2 x double> %vc, i32 zeroext %evl) {
419 ; CHECK-LABEL: vfma_vf_v2f64_unmasked:
421 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
422 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9
424 %elt.head = insertelement <2 x double> poison, double %b, i32 0
425 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
426 %v = call <2 x double> @llvm.vp.fmuladd.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> splat (i1 true), i32 %evl)
430 declare <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>, <4 x i1>, i32)
432 define <4 x double> @vfma_vv_v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
433 ; CHECK-LABEL: vfma_vv_v4f64:
435 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
436 ; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t
437 ; CHECK-NEXT: vmv.v.v v8, v10
439 %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 %evl)
443 define <4 x double> @vfma_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %b, <4 x double> %c, i32 zeroext %evl) {
444 ; CHECK-LABEL: vfma_vv_v4f64_unmasked:
446 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
447 ; CHECK-NEXT: vfmadd.vv v8, v10, v12
449 %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> splat (i1 true), i32 %evl)
453 define <4 x double> @vfma_vf_v4f64(<4 x double> %va, double %b, <4 x double> %vc, <4 x i1> %m, i32 zeroext %evl) {
454 ; CHECK-LABEL: vfma_vf_v4f64:
456 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
457 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t
459 %elt.head = insertelement <4 x double> poison, double %b, i32 0
460 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
461 %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> %m, i32 %evl)
465 define <4 x double> @vfma_vf_v4f64_unmasked(<4 x double> %va, double %b, <4 x double> %vc, i32 zeroext %evl) {
466 ; CHECK-LABEL: vfma_vf_v4f64_unmasked:
468 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
469 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10
471 %elt.head = insertelement <4 x double> poison, double %b, i32 0
472 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
473 %v = call <4 x double> @llvm.vp.fmuladd.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> splat (i1 true), i32 %evl)
477 declare <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double>, <8 x double>, <8 x double>, <8 x i1>, i32)
479 define <8 x double> @vfma_vv_v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
480 ; CHECK-LABEL: vfma_vv_v8f64:
482 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
483 ; CHECK-NEXT: vfmadd.vv v12, v8, v16, v0.t
484 ; CHECK-NEXT: vmv.v.v v8, v12
486 %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 %evl)
490 define <8 x double> @vfma_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %b, <8 x double> %c, i32 zeroext %evl) {
491 ; CHECK-LABEL: vfma_vv_v8f64_unmasked:
493 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
494 ; CHECK-NEXT: vfmadd.vv v8, v12, v16
496 %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> splat (i1 true), i32 %evl)
500 define <8 x double> @vfma_vf_v8f64(<8 x double> %va, double %b, <8 x double> %vc, <8 x i1> %m, i32 zeroext %evl) {
501 ; CHECK-LABEL: vfma_vf_v8f64:
503 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
504 ; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t
506 %elt.head = insertelement <8 x double> poison, double %b, i32 0
507 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
508 %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> %m, i32 %evl)
512 define <8 x double> @vfma_vf_v8f64_unmasked(<8 x double> %va, double %b, <8 x double> %vc, i32 zeroext %evl) {
513 ; CHECK-LABEL: vfma_vf_v8f64_unmasked:
515 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
516 ; CHECK-NEXT: vfmadd.vf v8, fa0, v12
518 %elt.head = insertelement <8 x double> poison, double %b, i32 0
519 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
520 %v = call <8 x double> @llvm.vp.fmuladd.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> splat (i1 true), i32 %evl)
524 declare <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double>, <15 x double>, <15 x double>, <15 x i1>, i32)
526 define <15 x double> @vfma_vv_v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 zeroext %evl) {
527 ; CHECK-LABEL: vfma_vv_v15f64:
529 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
530 ; CHECK-NEXT: vle64.v v24, (a0)
531 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
532 ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
533 ; CHECK-NEXT: vmv.v.v v8, v16
535 %v = call <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl)
539 define <15 x double> @vfma_vv_v15f64_unmasked(<15 x double> %va, <15 x double> %b, <15 x double> %c, i32 zeroext %evl) {
540 ; CHECK-LABEL: vfma_vv_v15f64_unmasked:
542 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
543 ; CHECK-NEXT: vle64.v v24, (a0)
544 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
545 ; CHECK-NEXT: vfmadd.vv v8, v16, v24
547 %v = call <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> splat (i1 true), i32 %evl)
551 declare <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double>, <16 x double>, <16 x double>, <16 x i1>, i32)
553 define <16 x double> @vfma_vv_v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 zeroext %evl) {
554 ; CHECK-LABEL: vfma_vv_v16f64:
556 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
557 ; CHECK-NEXT: vle64.v v24, (a0)
558 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
559 ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
560 ; CHECK-NEXT: vmv.v.v v8, v16
562 %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl)
566 define <16 x double> @vfma_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %b, <16 x double> %c, i32 zeroext %evl) {
567 ; CHECK-LABEL: vfma_vv_v16f64_unmasked:
569 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
570 ; CHECK-NEXT: vle64.v v24, (a0)
571 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
572 ; CHECK-NEXT: vfmadd.vv v8, v16, v24
574 %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> splat (i1 true), i32 %evl)
578 define <16 x double> @vfma_vf_v16f64(<16 x double> %va, double %b, <16 x double> %vc, <16 x i1> %m, i32 zeroext %evl) {
579 ; CHECK-LABEL: vfma_vf_v16f64:
581 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
582 ; CHECK-NEXT: vfmadd.vf v8, fa0, v16, v0.t
584 %elt.head = insertelement <16 x double> poison, double %b, i32 0
585 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer
586 %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> %m, i32 %evl)
590 define <16 x double> @vfma_vf_v16f64_unmasked(<16 x double> %va, double %b, <16 x double> %vc, i32 zeroext %evl) {
591 ; CHECK-LABEL: vfma_vf_v16f64_unmasked:
593 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
594 ; CHECK-NEXT: vfmadd.vf v8, fa0, v16
596 %elt.head = insertelement <16 x double> poison, double %b, i32 0
597 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer
598 %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> splat (i1 true), i32 %evl)
602 declare <32 x double> @llvm.vp.fmuladd.v32f64(<32 x double>, <32 x double>, <32 x double>, <32 x i1>, i32)
604 define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 zeroext %evl) {
605 ; CHECK-LABEL: vfma_vv_v32f64:
607 ; CHECK-NEXT: addi sp, sp, -16
608 ; CHECK-NEXT: .cfi_def_cfa_offset 16
609 ; CHECK-NEXT: csrr a1, vlenb
610 ; CHECK-NEXT: slli a1, a1, 5
611 ; CHECK-NEXT: sub sp, sp, a1
612 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
613 ; CHECK-NEXT: csrr a1, vlenb
614 ; CHECK-NEXT: li a3, 24
615 ; CHECK-NEXT: mul a1, a1, a3
616 ; CHECK-NEXT: add a1, sp, a1
617 ; CHECK-NEXT: addi a1, a1, 16
618 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
619 ; CHECK-NEXT: csrr a1, vlenb
620 ; CHECK-NEXT: slli a1, a1, 4
621 ; CHECK-NEXT: add a1, sp, a1
622 ; CHECK-NEXT: addi a1, a1, 16
623 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
624 ; CHECK-NEXT: addi a1, a2, 128
625 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
626 ; CHECK-NEXT: vle64.v v24, (a2)
627 ; CHECK-NEXT: addi a2, a0, 128
628 ; CHECK-NEXT: vle64.v v8, (a1)
629 ; CHECK-NEXT: csrr a1, vlenb
630 ; CHECK-NEXT: slli a1, a1, 3
631 ; CHECK-NEXT: add a1, sp, a1
632 ; CHECK-NEXT: addi a1, a1, 16
633 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
634 ; CHECK-NEXT: vle64.v v8, (a2)
635 ; CHECK-NEXT: addi a1, sp, 16
636 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
637 ; CHECK-NEXT: vle64.v v8, (a0)
638 ; CHECK-NEXT: li a1, 16
639 ; CHECK-NEXT: mv a0, a4
640 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
641 ; CHECK-NEXT: vslidedown.vi v7, v0, 2
642 ; CHECK-NEXT: bltu a4, a1, .LBB50_2
643 ; CHECK-NEXT: # %bb.1:
644 ; CHECK-NEXT: li a0, 16
645 ; CHECK-NEXT: .LBB50_2:
646 ; CHECK-NEXT: csrr a1, vlenb
647 ; CHECK-NEXT: slli a1, a1, 4
648 ; CHECK-NEXT: add a1, sp, a1
649 ; CHECK-NEXT: addi a1, a1, 16
650 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
651 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
652 ; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
653 ; CHECK-NEXT: csrr a0, vlenb
654 ; CHECK-NEXT: slli a0, a0, 4
655 ; CHECK-NEXT: add a0, sp, a0
656 ; CHECK-NEXT: addi a0, a0, 16
657 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
658 ; CHECK-NEXT: addi a0, a4, -16
659 ; CHECK-NEXT: sltu a1, a4, a0
660 ; CHECK-NEXT: addi a1, a1, -1
661 ; CHECK-NEXT: and a0, a1, a0
662 ; CHECK-NEXT: vmv1r.v v0, v7
663 ; CHECK-NEXT: csrr a1, vlenb
664 ; CHECK-NEXT: li a2, 24
665 ; CHECK-NEXT: mul a1, a1, a2
666 ; CHECK-NEXT: add a1, sp, a1
667 ; CHECK-NEXT: addi a1, a1, 16
668 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
669 ; CHECK-NEXT: csrr a1, vlenb
670 ; CHECK-NEXT: slli a1, a1, 3
671 ; CHECK-NEXT: add a1, sp, a1
672 ; CHECK-NEXT: addi a1, a1, 16
673 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
674 ; CHECK-NEXT: addi a1, sp, 16
675 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
676 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
677 ; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
678 ; CHECK-NEXT: vmv.v.v v16, v8
679 ; CHECK-NEXT: csrr a0, vlenb
680 ; CHECK-NEXT: slli a0, a0, 4
681 ; CHECK-NEXT: add a0, sp, a0
682 ; CHECK-NEXT: addi a0, a0, 16
683 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
684 ; CHECK-NEXT: csrr a0, vlenb
685 ; CHECK-NEXT: slli a0, a0, 5
686 ; CHECK-NEXT: add sp, sp, a0
687 ; CHECK-NEXT: .cfi_def_cfa sp, 16
688 ; CHECK-NEXT: addi sp, sp, 16
689 ; CHECK-NEXT: .cfi_def_cfa_offset 0
691 %v = call <32 x double> @llvm.vp.fmuladd.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 %evl)
695 define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %b, <32 x double> %c, i32 zeroext %evl) {
696 ; CHECK-LABEL: vfma_vv_v32f64_unmasked:
698 ; CHECK-NEXT: addi sp, sp, -16
699 ; CHECK-NEXT: .cfi_def_cfa_offset 16
700 ; CHECK-NEXT: csrr a1, vlenb
701 ; CHECK-NEXT: li a3, 24
702 ; CHECK-NEXT: mul a1, a1, a3
703 ; CHECK-NEXT: sub sp, sp, a1
704 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
705 ; CHECK-NEXT: csrr a1, vlenb
706 ; CHECK-NEXT: slli a1, a1, 4
707 ; CHECK-NEXT: add a1, sp, a1
708 ; CHECK-NEXT: addi a1, a1, 16
709 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
710 ; CHECK-NEXT: csrr a1, vlenb
711 ; CHECK-NEXT: slli a1, a1, 3
712 ; CHECK-NEXT: add a1, sp, a1
713 ; CHECK-NEXT: addi a1, a1, 16
714 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
715 ; CHECK-NEXT: addi a1, a2, 128
716 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
717 ; CHECK-NEXT: vle64.v v16, (a2)
718 ; CHECK-NEXT: addi a2, a0, 128
719 ; CHECK-NEXT: vle64.v v8, (a1)
720 ; CHECK-NEXT: addi a1, sp, 16
721 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
722 ; CHECK-NEXT: vle64.v v24, (a2)
723 ; CHECK-NEXT: vle64.v v0, (a0)
724 ; CHECK-NEXT: li a1, 16
725 ; CHECK-NEXT: mv a0, a4
726 ; CHECK-NEXT: bltu a4, a1, .LBB51_2
727 ; CHECK-NEXT: # %bb.1:
728 ; CHECK-NEXT: li a0, 16
729 ; CHECK-NEXT: .LBB51_2:
730 ; CHECK-NEXT: csrr a1, vlenb
731 ; CHECK-NEXT: slli a1, a1, 3
732 ; CHECK-NEXT: add a1, sp, a1
733 ; CHECK-NEXT: addi a1, a1, 16
734 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
735 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
736 ; CHECK-NEXT: vfmadd.vv v0, v8, v16
737 ; CHECK-NEXT: addi a0, a4, -16
738 ; CHECK-NEXT: sltu a1, a4, a0
739 ; CHECK-NEXT: addi a1, a1, -1
740 ; CHECK-NEXT: and a0, a1, a0
741 ; CHECK-NEXT: csrr a1, vlenb
742 ; CHECK-NEXT: slli a1, a1, 4
743 ; CHECK-NEXT: add a1, sp, a1
744 ; CHECK-NEXT: addi a1, a1, 16
745 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
746 ; CHECK-NEXT: addi a1, sp, 16
747 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
748 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
749 ; CHECK-NEXT: vfmadd.vv v24, v16, v8
750 ; CHECK-NEXT: vmv8r.v v8, v0
751 ; CHECK-NEXT: vmv.v.v v16, v24
752 ; CHECK-NEXT: csrr a0, vlenb
753 ; CHECK-NEXT: li a1, 24
754 ; CHECK-NEXT: mul a0, a0, a1
755 ; CHECK-NEXT: add sp, sp, a0
756 ; CHECK-NEXT: .cfi_def_cfa sp, 16
757 ; CHECK-NEXT: addi sp, sp, 16
758 ; CHECK-NEXT: .cfi_def_cfa_offset 0
760 %v = call <32 x double> @llvm.vp.fmuladd.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> splat (i1 true), i32 %evl)