1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
11 declare <2 x half> @llvm.vp.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, <2 x i1>, i32)
13 define <2 x half> @vfma_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
14 ; ZVFH-LABEL: vfma_vv_v2f16:
16 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
17 ; ZVFH-NEXT: vfmadd.vv v9, v8, v10, v0.t
18 ; ZVFH-NEXT: vmv1r.v v8, v9
21 ; ZVFHMIN-LABEL: vfma_vv_v2f16:
23 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
24 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
25 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
26 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
27 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
28 ; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11, v0.t
29 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
30 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
32 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl)
36 define <2 x half> @vfma_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, <2 x half> %c, i32 zeroext %evl) {
37 ; ZVFH-LABEL: vfma_vv_v2f16_unmasked:
39 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
40 ; ZVFH-NEXT: vfmadd.vv v8, v9, v10
43 ; ZVFHMIN-LABEL: vfma_vv_v2f16_unmasked:
45 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
46 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
47 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
48 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
49 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
50 ; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11
51 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
52 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
54 %head = insertelement <2 x i1> poison, i1 true, i32 0
55 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
56 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl)
60 define <2 x half> @vfma_vf_v2f16(<2 x half> %va, half %b, <2 x half> %vc, <2 x i1> %m, i32 zeroext %evl) {
61 ; ZVFH-LABEL: vfma_vf_v2f16:
63 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
64 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v9, v0.t
67 ; ZVFHMIN-LABEL: vfma_vf_v2f16:
69 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
70 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
71 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
72 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
73 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
74 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
75 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
76 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
77 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
78 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
79 ; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10, v0.t
80 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
81 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
83 %elt.head = insertelement <2 x half> poison, half %b, i32 0
84 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
85 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> %m, i32 %evl)
89 define <2 x half> @vfma_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> %vc, i32 zeroext %evl) {
90 ; ZVFH-LABEL: vfma_vf_v2f16_unmasked:
92 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
93 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v9
96 ; ZVFHMIN-LABEL: vfma_vf_v2f16_unmasked:
98 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
99 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
100 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
101 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
102 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
103 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
104 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
105 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
106 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
107 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
108 ; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10
109 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
110 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
112 %elt.head = insertelement <2 x half> poison, half %b, i32 0
113 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
114 %head = insertelement <2 x i1> poison, i1 true, i32 0
115 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
116 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> %m, i32 %evl)
120 declare <4 x half> @llvm.vp.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x i1>, i32)
122 define <4 x half> @vfma_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
123 ; ZVFH-LABEL: vfma_vv_v4f16:
125 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
126 ; ZVFH-NEXT: vfmadd.vv v9, v8, v10, v0.t
127 ; ZVFH-NEXT: vmv1r.v v8, v9
130 ; ZVFHMIN-LABEL: vfma_vv_v4f16:
132 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
133 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
134 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
135 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
136 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
137 ; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11, v0.t
138 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
139 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
141 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl)
145 define <4 x half> @vfma_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, <4 x half> %c, i32 zeroext %evl) {
146 ; ZVFH-LABEL: vfma_vv_v4f16_unmasked:
148 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
149 ; ZVFH-NEXT: vfmadd.vv v8, v9, v10
152 ; ZVFHMIN-LABEL: vfma_vv_v4f16_unmasked:
154 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
155 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
156 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
157 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
158 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
159 ; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11
160 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
161 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
163 %head = insertelement <4 x i1> poison, i1 true, i32 0
164 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
165 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl)
169 define <4 x half> @vfma_vf_v4f16(<4 x half> %va, half %b, <4 x half> %vc, <4 x i1> %m, i32 zeroext %evl) {
170 ; ZVFH-LABEL: vfma_vf_v4f16:
172 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
173 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v9, v0.t
176 ; ZVFHMIN-LABEL: vfma_vf_v4f16:
178 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
179 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
180 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
181 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
182 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
183 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
184 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
185 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
186 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
187 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
188 ; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10, v0.t
189 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
190 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
192 %elt.head = insertelement <4 x half> poison, half %b, i32 0
193 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
194 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> %m, i32 %evl)
198 define <4 x half> @vfma_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> %vc, i32 zeroext %evl) {
199 ; ZVFH-LABEL: vfma_vf_v4f16_unmasked:
201 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
202 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v9
205 ; ZVFHMIN-LABEL: vfma_vf_v4f16_unmasked:
207 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
208 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
209 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
210 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
211 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
212 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
213 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
214 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
215 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
216 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma
217 ; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10
218 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
219 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
221 %elt.head = insertelement <4 x half> poison, half %b, i32 0
222 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
223 %head = insertelement <4 x i1> poison, i1 true, i32 0
224 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
225 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> %m, i32 %evl)
229 declare <8 x half> @llvm.vp.fma.v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x i1>, i32)
231 define <8 x half> @vfma_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
232 ; ZVFH-LABEL: vfma_vv_v8f16:
234 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
235 ; ZVFH-NEXT: vfmadd.vv v9, v8, v10, v0.t
236 ; ZVFH-NEXT: vmv.v.v v8, v9
239 ; ZVFHMIN-LABEL: vfma_vv_v8f16:
241 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
242 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
243 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
244 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
245 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
246 ; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t
247 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
248 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
250 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl)
254 define <8 x half> @vfma_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, <8 x half> %c, i32 zeroext %evl) {
255 ; ZVFH-LABEL: vfma_vv_v8f16_unmasked:
257 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
258 ; ZVFH-NEXT: vfmadd.vv v8, v9, v10
261 ; ZVFHMIN-LABEL: vfma_vv_v8f16_unmasked:
263 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
264 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
265 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
266 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
267 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
268 ; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
269 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
270 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
272 %head = insertelement <8 x i1> poison, i1 true, i32 0
273 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
274 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl)
278 define <8 x half> @vfma_vf_v8f16(<8 x half> %va, half %b, <8 x half> %vc, <8 x i1> %m, i32 zeroext %evl) {
279 ; ZVFH-LABEL: vfma_vf_v8f16:
281 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
282 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v9, v0.t
285 ; ZVFHMIN-LABEL: vfma_vf_v8f16:
287 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
288 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
289 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
290 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
291 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
292 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
293 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
294 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
295 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
296 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
297 ; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v10, v0.t
298 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
299 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
301 %elt.head = insertelement <8 x half> poison, half %b, i32 0
302 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
303 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> %m, i32 %evl)
307 define <8 x half> @vfma_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> %vc, i32 zeroext %evl) {
308 ; ZVFH-LABEL: vfma_vf_v8f16_unmasked:
310 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
311 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v9
314 ; ZVFHMIN-LABEL: vfma_vf_v8f16_unmasked:
316 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
317 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
318 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
319 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
320 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
321 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
322 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
323 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
324 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
325 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma
326 ; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v10
327 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
328 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
330 %elt.head = insertelement <8 x half> poison, half %b, i32 0
331 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
332 %head = insertelement <8 x i1> poison, i1 true, i32 0
333 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
334 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> %m, i32 %evl)
338 declare <16 x half> @llvm.vp.fma.v16f16(<16 x half>, <16 x half>, <16 x half>, <16 x i1>, i32)
340 define <16 x half> @vfma_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
341 ; ZVFH-LABEL: vfma_vv_v16f16:
343 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
344 ; ZVFH-NEXT: vfmadd.vv v10, v8, v12, v0.t
345 ; ZVFH-NEXT: vmv.v.v v8, v10
348 ; ZVFHMIN-LABEL: vfma_vv_v16f16:
350 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
351 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
352 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
353 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
354 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
355 ; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t
356 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
357 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
359 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl)
363 define <16 x half> @vfma_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, <16 x half> %c, i32 zeroext %evl) {
364 ; ZVFH-LABEL: vfma_vv_v16f16_unmasked:
366 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
367 ; ZVFH-NEXT: vfmadd.vv v8, v10, v12
370 ; ZVFHMIN-LABEL: vfma_vv_v16f16_unmasked:
372 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
373 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
374 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
375 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
376 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
377 ; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16
378 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
379 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
381 %head = insertelement <16 x i1> poison, i1 true, i32 0
382 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
383 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl)
387 define <16 x half> @vfma_vf_v16f16(<16 x half> %va, half %b, <16 x half> %vc, <16 x i1> %m, i32 zeroext %evl) {
388 ; ZVFH-LABEL: vfma_vf_v16f16:
390 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
391 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v10, v0.t
394 ; ZVFHMIN-LABEL: vfma_vf_v16f16:
396 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
397 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
398 ; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
399 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
400 ; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
401 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
402 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
403 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
404 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
405 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
406 ; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v12, v0.t
407 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
408 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
410 %elt.head = insertelement <16 x half> poison, half %b, i32 0
411 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
412 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> %m, i32 %evl)
416 define <16 x half> @vfma_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x half> %vc, i32 zeroext %evl) {
417 ; ZVFH-LABEL: vfma_vf_v16f16_unmasked:
419 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
420 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v10
423 ; ZVFHMIN-LABEL: vfma_vf_v16f16_unmasked:
425 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
426 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
427 ; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
428 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
429 ; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
430 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
431 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
432 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
433 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
434 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma
435 ; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v12
436 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
437 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
439 %elt.head = insertelement <16 x half> poison, half %b, i32 0
440 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
441 %head = insertelement <16 x i1> poison, i1 true, i32 0
442 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
443 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> %m, i32 %evl)
447 declare <2 x float> @llvm.vp.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, <2 x i1>, i32)
449 define <2 x float> @vfma_vv_v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
450 ; CHECK-LABEL: vfma_vv_v2f32:
452 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
453 ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
454 ; CHECK-NEXT: vmv1r.v v8, v9
456 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 %evl)
460 define <2 x float> @vfma_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %b, <2 x float> %c, i32 zeroext %evl) {
461 ; CHECK-LABEL: vfma_vv_v2f32_unmasked:
463 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
464 ; CHECK-NEXT: vfmadd.vv v8, v9, v10
466 %head = insertelement <2 x i1> poison, i1 true, i32 0
467 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
468 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 %evl)
472 define <2 x float> @vfma_vf_v2f32(<2 x float> %va, float %b, <2 x float> %vc, <2 x i1> %m, i32 zeroext %evl) {
473 ; CHECK-LABEL: vfma_vf_v2f32:
475 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
476 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
478 %elt.head = insertelement <2 x float> poison, float %b, i32 0
479 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
480 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> %m, i32 %evl)
484 define <2 x float> @vfma_vf_v2f32_unmasked(<2 x float> %va, float %b, <2 x float> %vc, i32 zeroext %evl) {
485 ; CHECK-LABEL: vfma_vf_v2f32_unmasked:
487 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
488 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9
490 %elt.head = insertelement <2 x float> poison, float %b, i32 0
491 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
492 %head = insertelement <2 x i1> poison, i1 true, i32 0
493 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
494 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> %m, i32 %evl)
498 declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
500 define <4 x float> @vfma_vv_v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
501 ; CHECK-LABEL: vfma_vv_v4f32:
503 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
504 ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
505 ; CHECK-NEXT: vmv.v.v v8, v9
507 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 %evl)
511 define <4 x float> @vfma_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %b, <4 x float> %c, i32 zeroext %evl) {
512 ; CHECK-LABEL: vfma_vv_v4f32_unmasked:
514 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
515 ; CHECK-NEXT: vfmadd.vv v8, v9, v10
517 %head = insertelement <4 x i1> poison, i1 true, i32 0
518 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
519 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 %evl)
523 define <4 x float> @vfma_vf_v4f32(<4 x float> %va, float %b, <4 x float> %vc, <4 x i1> %m, i32 zeroext %evl) {
524 ; CHECK-LABEL: vfma_vf_v4f32:
526 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
527 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
529 %elt.head = insertelement <4 x float> poison, float %b, i32 0
530 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
531 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> %m, i32 %evl)
535 define <4 x float> @vfma_vf_v4f32_unmasked(<4 x float> %va, float %b, <4 x float> %vc, i32 zeroext %evl) {
536 ; CHECK-LABEL: vfma_vf_v4f32_unmasked:
538 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
539 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9
541 %elt.head = insertelement <4 x float> poison, float %b, i32 0
542 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
543 %head = insertelement <4 x i1> poison, i1 true, i32 0
544 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
545 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> %m, i32 %evl)
549 declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32)
551 define <8 x float> @vfma_vv_v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
552 ; CHECK-LABEL: vfma_vv_v8f32:
554 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
555 ; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t
556 ; CHECK-NEXT: vmv.v.v v8, v10
558 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 %evl)
562 define <8 x float> @vfma_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %b, <8 x float> %c, i32 zeroext %evl) {
563 ; CHECK-LABEL: vfma_vv_v8f32_unmasked:
565 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
566 ; CHECK-NEXT: vfmadd.vv v8, v10, v12
568 %head = insertelement <8 x i1> poison, i1 true, i32 0
569 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
570 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 %evl)
574 define <8 x float> @vfma_vf_v8f32(<8 x float> %va, float %b, <8 x float> %vc, <8 x i1> %m, i32 zeroext %evl) {
575 ; CHECK-LABEL: vfma_vf_v8f32:
577 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
578 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t
580 %elt.head = insertelement <8 x float> poison, float %b, i32 0
581 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
582 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> %m, i32 %evl)
586 define <8 x float> @vfma_vf_v8f32_unmasked(<8 x float> %va, float %b, <8 x float> %vc, i32 zeroext %evl) {
587 ; CHECK-LABEL: vfma_vf_v8f32_unmasked:
589 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
590 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10
592 %elt.head = insertelement <8 x float> poison, float %b, i32 0
593 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
594 %head = insertelement <8 x i1> poison, i1 true, i32 0
595 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
596 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> %m, i32 %evl)
600 declare <16 x float> @llvm.vp.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, <16 x i1>, i32)
602 define <16 x float> @vfma_vv_v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
603 ; CHECK-LABEL: vfma_vv_v16f32:
605 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
606 ; CHECK-NEXT: vfmadd.vv v12, v8, v16, v0.t
607 ; CHECK-NEXT: vmv.v.v v8, v12
609 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 %evl)
613 define <16 x float> @vfma_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %b, <16 x float> %c, i32 zeroext %evl) {
614 ; CHECK-LABEL: vfma_vv_v16f32_unmasked:
616 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
617 ; CHECK-NEXT: vfmadd.vv v8, v12, v16
619 %head = insertelement <16 x i1> poison, i1 true, i32 0
620 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
621 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 %evl)
625 define <16 x float> @vfma_vf_v16f32(<16 x float> %va, float %b, <16 x float> %vc, <16 x i1> %m, i32 zeroext %evl) {
626 ; CHECK-LABEL: vfma_vf_v16f32:
628 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
629 ; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t
631 %elt.head = insertelement <16 x float> poison, float %b, i32 0
632 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
633 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> %m, i32 %evl)
637 define <16 x float> @vfma_vf_v16f32_unmasked(<16 x float> %va, float %b, <16 x float> %vc, i32 zeroext %evl) {
638 ; CHECK-LABEL: vfma_vf_v16f32_unmasked:
640 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
641 ; CHECK-NEXT: vfmadd.vf v8, fa0, v12
643 %elt.head = insertelement <16 x float> poison, float %b, i32 0
644 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
645 %head = insertelement <16 x i1> poison, i1 true, i32 0
646 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
647 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> %m, i32 %evl)
651 declare <2 x double> @llvm.vp.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, <2 x i1>, i32)
653 define <2 x double> @vfma_vv_v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
654 ; CHECK-LABEL: vfma_vv_v2f64:
656 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
657 ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
658 ; CHECK-NEXT: vmv.v.v v8, v9
660 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 %evl)
664 define <2 x double> @vfma_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %b, <2 x double> %c, i32 zeroext %evl) {
665 ; CHECK-LABEL: vfma_vv_v2f64_unmasked:
667 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
668 ; CHECK-NEXT: vfmadd.vv v8, v9, v10
670 %head = insertelement <2 x i1> poison, i1 true, i32 0
671 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
672 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 %evl)
676 define <2 x double> @vfma_vf_v2f64(<2 x double> %va, double %b, <2 x double> %vc, <2 x i1> %m, i32 zeroext %evl) {
677 ; CHECK-LABEL: vfma_vf_v2f64:
679 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
680 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
682 %elt.head = insertelement <2 x double> poison, double %b, i32 0
683 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
684 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> %m, i32 %evl)
688 define <2 x double> @vfma_vf_v2f64_unmasked(<2 x double> %va, double %b, <2 x double> %vc, i32 zeroext %evl) {
689 ; CHECK-LABEL: vfma_vf_v2f64_unmasked:
691 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
692 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9
694 %elt.head = insertelement <2 x double> poison, double %b, i32 0
695 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
696 %head = insertelement <2 x i1> poison, i1 true, i32 0
697 %m = shufflevector <2 x i1> %head, <2 x i1> poison, <2 x i32> zeroinitializer
698 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> %m, i32 %evl)
702 declare <4 x double> @llvm.vp.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, <4 x i1>, i32)
704 define <4 x double> @vfma_vv_v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
705 ; CHECK-LABEL: vfma_vv_v4f64:
707 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
708 ; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t
709 ; CHECK-NEXT: vmv.v.v v8, v10
711 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 %evl)
715 define <4 x double> @vfma_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %b, <4 x double> %c, i32 zeroext %evl) {
716 ; CHECK-LABEL: vfma_vv_v4f64_unmasked:
718 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
719 ; CHECK-NEXT: vfmadd.vv v8, v10, v12
721 %head = insertelement <4 x i1> poison, i1 true, i32 0
722 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
723 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 %evl)
727 define <4 x double> @vfma_vf_v4f64(<4 x double> %va, double %b, <4 x double> %vc, <4 x i1> %m, i32 zeroext %evl) {
728 ; CHECK-LABEL: vfma_vf_v4f64:
730 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
731 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t
733 %elt.head = insertelement <4 x double> poison, double %b, i32 0
734 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
735 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> %m, i32 %evl)
739 define <4 x double> @vfma_vf_v4f64_unmasked(<4 x double> %va, double %b, <4 x double> %vc, i32 zeroext %evl) {
740 ; CHECK-LABEL: vfma_vf_v4f64_unmasked:
742 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
743 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10
745 %elt.head = insertelement <4 x double> poison, double %b, i32 0
746 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
747 %head = insertelement <4 x i1> poison, i1 true, i32 0
748 %m = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
749 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> %m, i32 %evl)
753 declare <8 x double> @llvm.vp.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, <8 x i1>, i32)
755 define <8 x double> @vfma_vv_v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
756 ; CHECK-LABEL: vfma_vv_v8f64:
758 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
759 ; CHECK-NEXT: vfmadd.vv v12, v8, v16, v0.t
760 ; CHECK-NEXT: vmv.v.v v8, v12
762 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 %evl)
766 define <8 x double> @vfma_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %b, <8 x double> %c, i32 zeroext %evl) {
767 ; CHECK-LABEL: vfma_vv_v8f64_unmasked:
769 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
770 ; CHECK-NEXT: vfmadd.vv v8, v12, v16
772 %head = insertelement <8 x i1> poison, i1 true, i32 0
773 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
774 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 %evl)
778 define <8 x double> @vfma_vf_v8f64(<8 x double> %va, double %b, <8 x double> %vc, <8 x i1> %m, i32 zeroext %evl) {
779 ; CHECK-LABEL: vfma_vf_v8f64:
781 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
782 ; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t
784 %elt.head = insertelement <8 x double> poison, double %b, i32 0
785 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
786 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> %m, i32 %evl)
790 define <8 x double> @vfma_vf_v8f64_unmasked(<8 x double> %va, double %b, <8 x double> %vc, i32 zeroext %evl) {
791 ; CHECK-LABEL: vfma_vf_v8f64_unmasked:
793 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
794 ; CHECK-NEXT: vfmadd.vf v8, fa0, v12
796 %elt.head = insertelement <8 x double> poison, double %b, i32 0
797 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
798 %head = insertelement <8 x i1> poison, i1 true, i32 0
799 %m = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
800 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> %m, i32 %evl)
804 declare <15 x double> @llvm.vp.fma.v15f64(<15 x double>, <15 x double>, <15 x double>, <15 x i1>, i32)
806 define <15 x double> @vfma_vv_v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 zeroext %evl) {
807 ; CHECK-LABEL: vfma_vv_v15f64:
809 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
810 ; CHECK-NEXT: vle64.v v24, (a0)
811 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
812 ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
813 ; CHECK-NEXT: vmv.v.v v8, v16
815 %v = call <15 x double> @llvm.vp.fma.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl)
819 define <15 x double> @vfma_vv_v15f64_unmasked(<15 x double> %va, <15 x double> %b, <15 x double> %c, i32 zeroext %evl) {
820 ; CHECK-LABEL: vfma_vv_v15f64_unmasked:
822 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
823 ; CHECK-NEXT: vle64.v v24, (a0)
824 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
825 ; CHECK-NEXT: vfmadd.vv v8, v16, v24
827 %head = insertelement <15 x i1> poison, i1 true, i32 0
828 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
829 %v = call <15 x double> @llvm.vp.fma.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl)
833 declare <16 x double> @llvm.vp.fma.v16f64(<16 x double>, <16 x double>, <16 x double>, <16 x i1>, i32)
835 define <16 x double> @vfma_vv_v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 zeroext %evl) {
836 ; CHECK-LABEL: vfma_vv_v16f64:
838 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
839 ; CHECK-NEXT: vle64.v v24, (a0)
840 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
841 ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
842 ; CHECK-NEXT: vmv.v.v v8, v16
844 %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl)
848 define <16 x double> @vfma_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %b, <16 x double> %c, i32 zeroext %evl) {
849 ; CHECK-LABEL: vfma_vv_v16f64_unmasked:
851 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
852 ; CHECK-NEXT: vle64.v v24, (a0)
853 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
854 ; CHECK-NEXT: vfmadd.vv v8, v16, v24
856 %head = insertelement <16 x i1> poison, i1 true, i32 0
857 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
858 %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl)
862 define <16 x double> @vfma_vf_v16f64(<16 x double> %va, double %b, <16 x double> %vc, <16 x i1> %m, i32 zeroext %evl) {
863 ; CHECK-LABEL: vfma_vf_v16f64:
865 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
866 ; CHECK-NEXT: vfmadd.vf v8, fa0, v16, v0.t
868 %elt.head = insertelement <16 x double> poison, double %b, i32 0
869 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer
870 %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> %m, i32 %evl)
874 define <16 x double> @vfma_vf_v16f64_unmasked(<16 x double> %va, double %b, <16 x double> %vc, i32 zeroext %evl) {
875 ; CHECK-LABEL: vfma_vf_v16f64_unmasked:
877 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
878 ; CHECK-NEXT: vfmadd.vf v8, fa0, v16
880 %elt.head = insertelement <16 x double> poison, double %b, i32 0
881 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer
882 %head = insertelement <16 x i1> poison, i1 true, i32 0
883 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
884 %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> %m, i32 %evl)
888 declare <32 x double> @llvm.vp.fma.v32f64(<32 x double>, <32 x double>, <32 x double>, <32 x i1>, i32)
890 define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 zeroext %evl) {
891 ; CHECK-LABEL: vfma_vv_v32f64:
893 ; CHECK-NEXT: addi sp, sp, -16
894 ; CHECK-NEXT: .cfi_def_cfa_offset 16
895 ; CHECK-NEXT: csrr a1, vlenb
896 ; CHECK-NEXT: slli a1, a1, 5
897 ; CHECK-NEXT: sub sp, sp, a1
898 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
899 ; CHECK-NEXT: addi a1, a2, 128
900 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
901 ; CHECK-NEXT: vle64.v v24, (a1)
902 ; CHECK-NEXT: csrr a1, vlenb
903 ; CHECK-NEXT: li a3, 24
904 ; CHECK-NEXT: mul a1, a1, a3
905 ; CHECK-NEXT: add a1, sp, a1
906 ; CHECK-NEXT: addi a1, a1, 16
907 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
908 ; CHECK-NEXT: addi a1, a0, 128
909 ; CHECK-NEXT: vle64.v v24, (a1)
910 ; CHECK-NEXT: csrr a1, vlenb
911 ; CHECK-NEXT: slli a1, a1, 4
912 ; CHECK-NEXT: add a1, sp, a1
913 ; CHECK-NEXT: addi a1, a1, 16
914 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
915 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
916 ; CHECK-NEXT: vslidedown.vi v1, v0, 2
917 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
918 ; CHECK-NEXT: vle64.v v24, (a2)
919 ; CHECK-NEXT: addi a1, sp, 16
920 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
921 ; CHECK-NEXT: vle64.v v24, (a0)
922 ; CHECK-NEXT: li a1, 16
923 ; CHECK-NEXT: csrr a0, vlenb
924 ; CHECK-NEXT: slli a0, a0, 3
925 ; CHECK-NEXT: add a0, sp, a0
926 ; CHECK-NEXT: addi a0, a0, 16
927 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
928 ; CHECK-NEXT: mv a0, a4
929 ; CHECK-NEXT: bltu a4, a1, .LBB50_2
930 ; CHECK-NEXT: # %bb.1:
931 ; CHECK-NEXT: li a0, 16
932 ; CHECK-NEXT: .LBB50_2:
933 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
934 ; CHECK-NEXT: addi a0, sp, 16
935 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
936 ; CHECK-NEXT: vfmadd.vv v24, v8, v16, v0.t
937 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
938 ; CHECK-NEXT: addi a0, a4, -16
939 ; CHECK-NEXT: sltu a1, a4, a0
940 ; CHECK-NEXT: addi a1, a1, -1
941 ; CHECK-NEXT: and a0, a1, a0
942 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
943 ; CHECK-NEXT: vmv1r.v v0, v1
944 ; CHECK-NEXT: csrr a0, vlenb
945 ; CHECK-NEXT: li a1, 24
946 ; CHECK-NEXT: mul a0, a0, a1
947 ; CHECK-NEXT: add a0, sp, a0
948 ; CHECK-NEXT: addi a0, a0, 16
949 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
950 ; CHECK-NEXT: csrr a0, vlenb
951 ; CHECK-NEXT: slli a0, a0, 4
952 ; CHECK-NEXT: add a0, sp, a0
953 ; CHECK-NEXT: addi a0, a0, 16
954 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
955 ; CHECK-NEXT: csrr a0, vlenb
956 ; CHECK-NEXT: slli a0, a0, 3
957 ; CHECK-NEXT: add a0, sp, a0
958 ; CHECK-NEXT: addi a0, a0, 16
959 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
960 ; CHECK-NEXT: vfmadd.vv v16, v24, v8, v0.t
961 ; CHECK-NEXT: addi a0, sp, 16
962 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
963 ; CHECK-NEXT: csrr a0, vlenb
964 ; CHECK-NEXT: slli a0, a0, 5
965 ; CHECK-NEXT: add sp, sp, a0
966 ; CHECK-NEXT: addi sp, sp, 16
968 %v = call <32 x double> @llvm.vp.fma.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 %evl)
972 define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %b, <32 x double> %c, i32 zeroext %evl) {
973 ; CHECK-LABEL: vfma_vv_v32f64_unmasked:
975 ; CHECK-NEXT: addi sp, sp, -16
976 ; CHECK-NEXT: .cfi_def_cfa_offset 16
977 ; CHECK-NEXT: csrr a1, vlenb
978 ; CHECK-NEXT: li a3, 24
979 ; CHECK-NEXT: mul a1, a1, a3
980 ; CHECK-NEXT: sub sp, sp, a1
981 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
982 ; CHECK-NEXT: addi a1, a2, 128
983 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
984 ; CHECK-NEXT: vle64.v v24, (a1)
985 ; CHECK-NEXT: csrr a1, vlenb
986 ; CHECK-NEXT: slli a1, a1, 4
987 ; CHECK-NEXT: add a1, sp, a1
988 ; CHECK-NEXT: addi a1, a1, 16
989 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
990 ; CHECK-NEXT: addi a1, a0, 128
991 ; CHECK-NEXT: vle64.v v24, (a1)
992 ; CHECK-NEXT: addi a1, sp, 16
993 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
994 ; CHECK-NEXT: vle64.v v24, (a2)
995 ; CHECK-NEXT: csrr a1, vlenb
996 ; CHECK-NEXT: slli a1, a1, 3
997 ; CHECK-NEXT: add a1, sp, a1
998 ; CHECK-NEXT: addi a1, a1, 16
999 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill
1000 ; CHECK-NEXT: vle64.v v0, (a0)
1001 ; CHECK-NEXT: li a1, 16
1002 ; CHECK-NEXT: mv a0, a4
1003 ; CHECK-NEXT: bltu a4, a1, .LBB51_2
1004 ; CHECK-NEXT: # %bb.1:
1005 ; CHECK-NEXT: li a0, 16
1006 ; CHECK-NEXT: .LBB51_2:
1007 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1008 ; CHECK-NEXT: csrr a0, vlenb
1009 ; CHECK-NEXT: slli a0, a0, 3
1010 ; CHECK-NEXT: add a0, sp, a0
1011 ; CHECK-NEXT: addi a0, a0, 16
1012 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1013 ; CHECK-NEXT: vfmadd.vv v0, v8, v24
1014 ; CHECK-NEXT: addi a0, a4, -16
1015 ; CHECK-NEXT: sltu a1, a4, a0
1016 ; CHECK-NEXT: addi a1, a1, -1
1017 ; CHECK-NEXT: and a0, a1, a0
1018 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
1019 ; CHECK-NEXT: csrr a0, vlenb
1020 ; CHECK-NEXT: slli a0, a0, 4
1021 ; CHECK-NEXT: add a0, sp, a0
1022 ; CHECK-NEXT: addi a0, a0, 16
1023 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1024 ; CHECK-NEXT: addi a0, sp, 16
1025 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1026 ; CHECK-NEXT: vfmadd.vv v24, v16, v8
1027 ; CHECK-NEXT: vmv8r.v v8, v0
1028 ; CHECK-NEXT: vmv.v.v v16, v24
1029 ; CHECK-NEXT: csrr a0, vlenb
1030 ; CHECK-NEXT: li a1, 24
1031 ; CHECK-NEXT: mul a0, a0, a1
1032 ; CHECK-NEXT: add sp, sp, a0
1033 ; CHECK-NEXT: addi sp, sp, 16
1035 %head = insertelement <32 x i1> poison, i1 true, i32 0
1036 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
1037 %v = call <32 x double> @llvm.vp.fma.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 %evl)
1038 ret <32 x double> %v