1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
11 declare <2 x half> @llvm.vp.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, <2 x i1>, i32)
13 define <2 x half> @vfma_vv_v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
14 ; ZVFH-LABEL: vfma_vv_v2f16:
16 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
17 ; ZVFH-NEXT: vfmadd.vv v9, v8, v10, v0.t
18 ; ZVFH-NEXT: vmv1r.v v8, v9
21 ; ZVFHMIN-LABEL: vfma_vv_v2f16:
23 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
24 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t
25 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
26 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
27 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
28 ; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11, v0.t
29 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
30 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
32 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 %evl)
36 define <2 x half> @vfma_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %b, <2 x half> %c, i32 zeroext %evl) {
37 ; ZVFH-LABEL: vfma_vv_v2f16_unmasked:
39 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
40 ; ZVFH-NEXT: vfmadd.vv v8, v9, v10
43 ; ZVFHMIN-LABEL: vfma_vv_v2f16_unmasked:
45 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
46 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
47 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
48 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
49 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
50 ; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11
51 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
52 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
54 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %b, <2 x half> %c, <2 x i1> splat (i1 true), i32 %evl)
58 define <2 x half> @vfma_vf_v2f16(<2 x half> %va, half %b, <2 x half> %vc, <2 x i1> %m, i32 zeroext %evl) {
59 ; ZVFH-LABEL: vfma_vf_v2f16:
61 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
62 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v9, v0.t
65 ; ZVFHMIN-LABEL: vfma_vf_v2f16:
67 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
68 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
69 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
70 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
71 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
72 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
73 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
74 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
75 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
76 ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10, v0.t
77 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
78 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
80 %elt.head = insertelement <2 x half> poison, half %b, i32 0
81 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
82 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> %m, i32 %evl)
86 define <2 x half> @vfma_vf_v2f16_unmasked(<2 x half> %va, half %b, <2 x half> %vc, i32 zeroext %evl) {
87 ; ZVFH-LABEL: vfma_vf_v2f16_unmasked:
89 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
90 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v9
93 ; ZVFHMIN-LABEL: vfma_vf_v2f16_unmasked:
95 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
96 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
97 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
98 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
99 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
100 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
101 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
102 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
103 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
104 ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10
105 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
106 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
108 %elt.head = insertelement <2 x half> poison, half %b, i32 0
109 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
110 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %va, <2 x half> %vb, <2 x half> %vc, <2 x i1> splat (i1 true), i32 %evl)
114 declare <4 x half> @llvm.vp.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x i1>, i32)
116 define <4 x half> @vfma_vv_v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
117 ; ZVFH-LABEL: vfma_vv_v4f16:
119 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
120 ; ZVFH-NEXT: vfmadd.vv v9, v8, v10, v0.t
121 ; ZVFH-NEXT: vmv1r.v v8, v9
124 ; ZVFHMIN-LABEL: vfma_vv_v4f16:
126 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
127 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10, v0.t
128 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
129 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
130 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
131 ; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11, v0.t
132 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
133 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
135 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 %evl)
139 define <4 x half> @vfma_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %b, <4 x half> %c, i32 zeroext %evl) {
140 ; ZVFH-LABEL: vfma_vv_v4f16_unmasked:
142 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
143 ; ZVFH-NEXT: vfmadd.vv v8, v9, v10
146 ; ZVFHMIN-LABEL: vfma_vv_v4f16_unmasked:
148 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
149 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
150 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
151 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
152 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
153 ; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11
154 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
155 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
157 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %b, <4 x half> %c, <4 x i1> splat (i1 true), i32 %evl)
161 define <4 x half> @vfma_vf_v4f16(<4 x half> %va, half %b, <4 x half> %vc, <4 x i1> %m, i32 zeroext %evl) {
162 ; ZVFH-LABEL: vfma_vf_v4f16:
164 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
165 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v9, v0.t
168 ; ZVFHMIN-LABEL: vfma_vf_v4f16:
170 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
171 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
172 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
173 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
174 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
175 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
176 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
177 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
178 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
179 ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10, v0.t
180 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
181 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12, v0.t
183 %elt.head = insertelement <4 x half> poison, half %b, i32 0
184 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
185 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> %m, i32 %evl)
189 define <4 x half> @vfma_vf_v4f16_unmasked(<4 x half> %va, half %b, <4 x half> %vc, i32 zeroext %evl) {
190 ; ZVFH-LABEL: vfma_vf_v4f16_unmasked:
192 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
193 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v9
196 ; ZVFHMIN-LABEL: vfma_vf_v4f16_unmasked:
198 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
199 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
200 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
201 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
202 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
203 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
204 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
205 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
206 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
207 ; ZVFHMIN-NEXT: vfmadd.vv v12, v11, v10
208 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
209 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
211 %elt.head = insertelement <4 x half> poison, half %b, i32 0
212 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
213 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %va, <4 x half> %vb, <4 x half> %vc, <4 x i1> splat (i1 true), i32 %evl)
217 declare <8 x half> @llvm.vp.fma.v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x i1>, i32)
219 define <8 x half> @vfma_vv_v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
220 ; ZVFH-LABEL: vfma_vv_v8f16:
222 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
223 ; ZVFH-NEXT: vfmadd.vv v9, v8, v10, v0.t
224 ; ZVFH-NEXT: vmv.v.v v8, v9
227 ; ZVFHMIN-LABEL: vfma_vv_v8f16:
229 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
230 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
231 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
232 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t
233 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
234 ; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12, v0.t
235 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
236 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t
238 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 %evl)
242 define <8 x half> @vfma_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %b, <8 x half> %c, i32 zeroext %evl) {
243 ; ZVFH-LABEL: vfma_vv_v8f16_unmasked:
245 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
246 ; ZVFH-NEXT: vfmadd.vv v8, v9, v10
249 ; ZVFHMIN-LABEL: vfma_vv_v8f16_unmasked:
251 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
252 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
253 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
254 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
255 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
256 ; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
257 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
258 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
260 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %b, <8 x half> %c, <8 x i1> splat (i1 true), i32 %evl)
264 define <8 x half> @vfma_vf_v8f16(<8 x half> %va, half %b, <8 x half> %vc, <8 x i1> %m, i32 zeroext %evl) {
265 ; ZVFH-LABEL: vfma_vf_v8f16:
267 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
268 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v9, v0.t
271 ; ZVFHMIN-LABEL: vfma_vf_v8f16:
273 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
274 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
275 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9, v0.t
276 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
277 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
278 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
279 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
280 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t
281 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
282 ; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10, v0.t
283 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
284 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14, v0.t
286 %elt.head = insertelement <8 x half> poison, half %b, i32 0
287 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
288 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> %m, i32 %evl)
292 define <8 x half> @vfma_vf_v8f16_unmasked(<8 x half> %va, half %b, <8 x half> %vc, i32 zeroext %evl) {
293 ; ZVFH-LABEL: vfma_vf_v8f16_unmasked:
295 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
296 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v9
299 ; ZVFHMIN-LABEL: vfma_vf_v8f16_unmasked:
301 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
302 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
303 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
304 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
305 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
306 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
307 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
308 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
309 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
310 ; ZVFHMIN-NEXT: vfmadd.vv v14, v12, v10
311 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
312 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
314 %elt.head = insertelement <8 x half> poison, half %b, i32 0
315 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
316 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %va, <8 x half> %vb, <8 x half> %vc, <8 x i1> splat (i1 true), i32 %evl)
320 declare <16 x half> @llvm.vp.fma.v16f16(<16 x half>, <16 x half>, <16 x half>, <16 x i1>, i32)
322 define <16 x half> @vfma_vv_v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
323 ; ZVFH-LABEL: vfma_vv_v16f16:
325 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
326 ; ZVFH-NEXT: vfmadd.vv v10, v8, v12, v0.t
327 ; ZVFH-NEXT: vmv.v.v v8, v10
330 ; ZVFHMIN-LABEL: vfma_vv_v16f16:
332 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
333 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t
334 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
335 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t
336 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
337 ; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16, v0.t
338 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
339 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t
341 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 %evl)
345 define <16 x half> @vfma_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %b, <16 x half> %c, i32 zeroext %evl) {
346 ; ZVFH-LABEL: vfma_vv_v16f16_unmasked:
348 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
349 ; ZVFH-NEXT: vfmadd.vv v8, v10, v12
352 ; ZVFHMIN-LABEL: vfma_vv_v16f16_unmasked:
354 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
355 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
356 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
357 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
358 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
359 ; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16
360 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
361 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
363 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %b, <16 x half> %c, <16 x i1> splat (i1 true), i32 %evl)
367 define <16 x half> @vfma_vf_v16f16(<16 x half> %va, half %b, <16 x half> %vc, <16 x i1> %m, i32 zeroext %evl) {
368 ; ZVFH-LABEL: vfma_vf_v16f16:
370 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
371 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v10, v0.t
374 ; ZVFHMIN-LABEL: vfma_vf_v16f16:
376 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
377 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
378 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10, v0.t
379 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
380 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
381 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
382 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
383 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t
384 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
385 ; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12, v0.t
386 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
387 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20, v0.t
389 %elt.head = insertelement <16 x half> poison, half %b, i32 0
390 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
391 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> %m, i32 %evl)
395 define <16 x half> @vfma_vf_v16f16_unmasked(<16 x half> %va, half %b, <16 x half> %vc, i32 zeroext %evl) {
396 ; ZVFH-LABEL: vfma_vf_v16f16_unmasked:
398 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
399 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v10
402 ; ZVFHMIN-LABEL: vfma_vf_v16f16_unmasked:
404 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
405 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
406 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
407 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
408 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
409 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
410 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
411 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
412 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
413 ; ZVFHMIN-NEXT: vfmadd.vv v20, v16, v12
414 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
415 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20
417 %elt.head = insertelement <16 x half> poison, half %b, i32 0
418 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
419 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %va, <16 x half> %vb, <16 x half> %vc, <16 x i1> splat (i1 true), i32 %evl)
423 declare <2 x float> @llvm.vp.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, <2 x i1>, i32)
425 define <2 x float> @vfma_vv_v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
426 ; CHECK-LABEL: vfma_vv_v2f32:
428 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
429 ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
430 ; CHECK-NEXT: vmv1r.v v8, v9
432 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 %evl)
436 define <2 x float> @vfma_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %b, <2 x float> %c, i32 zeroext %evl) {
437 ; CHECK-LABEL: vfma_vv_v2f32_unmasked:
439 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
440 ; CHECK-NEXT: vfmadd.vv v8, v9, v10
442 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %b, <2 x float> %c, <2 x i1> splat (i1 true), i32 %evl)
446 define <2 x float> @vfma_vf_v2f32(<2 x float> %va, float %b, <2 x float> %vc, <2 x i1> %m, i32 zeroext %evl) {
447 ; CHECK-LABEL: vfma_vf_v2f32:
449 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
450 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
452 %elt.head = insertelement <2 x float> poison, float %b, i32 0
453 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
454 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> %m, i32 %evl)
458 define <2 x float> @vfma_vf_v2f32_unmasked(<2 x float> %va, float %b, <2 x float> %vc, i32 zeroext %evl) {
459 ; CHECK-LABEL: vfma_vf_v2f32_unmasked:
461 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
462 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9
464 %elt.head = insertelement <2 x float> poison, float %b, i32 0
465 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
466 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %va, <2 x float> %vb, <2 x float> %vc, <2 x i1> splat (i1 true), i32 %evl)
470 declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
472 define <4 x float> @vfma_vv_v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
473 ; CHECK-LABEL: vfma_vv_v4f32:
475 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
476 ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
477 ; CHECK-NEXT: vmv.v.v v8, v9
479 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 %evl)
483 define <4 x float> @vfma_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %b, <4 x float> %c, i32 zeroext %evl) {
484 ; CHECK-LABEL: vfma_vv_v4f32_unmasked:
486 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
487 ; CHECK-NEXT: vfmadd.vv v8, v9, v10
489 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %b, <4 x float> %c, <4 x i1> splat (i1 true), i32 %evl)
493 define <4 x float> @vfma_vf_v4f32(<4 x float> %va, float %b, <4 x float> %vc, <4 x i1> %m, i32 zeroext %evl) {
494 ; CHECK-LABEL: vfma_vf_v4f32:
496 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
497 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
499 %elt.head = insertelement <4 x float> poison, float %b, i32 0
500 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
501 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> %m, i32 %evl)
505 define <4 x float> @vfma_vf_v4f32_unmasked(<4 x float> %va, float %b, <4 x float> %vc, i32 zeroext %evl) {
506 ; CHECK-LABEL: vfma_vf_v4f32_unmasked:
508 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
509 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9
511 %elt.head = insertelement <4 x float> poison, float %b, i32 0
512 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
513 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %va, <4 x float> %vb, <4 x float> %vc, <4 x i1> splat (i1 true), i32 %evl)
517 declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32)
519 define <8 x float> @vfma_vv_v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
520 ; CHECK-LABEL: vfma_vv_v8f32:
522 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
523 ; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t
524 ; CHECK-NEXT: vmv.v.v v8, v10
526 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 %evl)
530 define <8 x float> @vfma_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %b, <8 x float> %c, i32 zeroext %evl) {
531 ; CHECK-LABEL: vfma_vv_v8f32_unmasked:
533 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
534 ; CHECK-NEXT: vfmadd.vv v8, v10, v12
536 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %b, <8 x float> %c, <8 x i1> splat (i1 true), i32 %evl)
540 define <8 x float> @vfma_vf_v8f32(<8 x float> %va, float %b, <8 x float> %vc, <8 x i1> %m, i32 zeroext %evl) {
541 ; CHECK-LABEL: vfma_vf_v8f32:
543 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
544 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t
546 %elt.head = insertelement <8 x float> poison, float %b, i32 0
547 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
548 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> %m, i32 %evl)
552 define <8 x float> @vfma_vf_v8f32_unmasked(<8 x float> %va, float %b, <8 x float> %vc, i32 zeroext %evl) {
553 ; CHECK-LABEL: vfma_vf_v8f32_unmasked:
555 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
556 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10
558 %elt.head = insertelement <8 x float> poison, float %b, i32 0
559 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
560 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %va, <8 x float> %vb, <8 x float> %vc, <8 x i1> splat (i1 true), i32 %evl)
564 declare <16 x float> @llvm.vp.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, <16 x i1>, i32)
566 define <16 x float> @vfma_vv_v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
567 ; CHECK-LABEL: vfma_vv_v16f32:
569 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
570 ; CHECK-NEXT: vfmadd.vv v12, v8, v16, v0.t
571 ; CHECK-NEXT: vmv.v.v v8, v12
573 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 %evl)
577 define <16 x float> @vfma_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %b, <16 x float> %c, i32 zeroext %evl) {
578 ; CHECK-LABEL: vfma_vv_v16f32_unmasked:
580 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
581 ; CHECK-NEXT: vfmadd.vv v8, v12, v16
583 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %b, <16 x float> %c, <16 x i1> splat (i1 true), i32 %evl)
587 define <16 x float> @vfma_vf_v16f32(<16 x float> %va, float %b, <16 x float> %vc, <16 x i1> %m, i32 zeroext %evl) {
588 ; CHECK-LABEL: vfma_vf_v16f32:
590 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
591 ; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t
593 %elt.head = insertelement <16 x float> poison, float %b, i32 0
594 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
595 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> %m, i32 %evl)
599 define <16 x float> @vfma_vf_v16f32_unmasked(<16 x float> %va, float %b, <16 x float> %vc, i32 zeroext %evl) {
600 ; CHECK-LABEL: vfma_vf_v16f32_unmasked:
602 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
603 ; CHECK-NEXT: vfmadd.vf v8, fa0, v12
605 %elt.head = insertelement <16 x float> poison, float %b, i32 0
606 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
607 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc, <16 x i1> splat (i1 true), i32 %evl)
611 declare <2 x double> @llvm.vp.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, <2 x i1>, i32)
613 define <2 x double> @vfma_vv_v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
614 ; CHECK-LABEL: vfma_vv_v2f64:
616 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
617 ; CHECK-NEXT: vfmadd.vv v9, v8, v10, v0.t
618 ; CHECK-NEXT: vmv.v.v v8, v9
620 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 %evl)
624 define <2 x double> @vfma_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %b, <2 x double> %c, i32 zeroext %evl) {
625 ; CHECK-LABEL: vfma_vv_v2f64_unmasked:
627 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
628 ; CHECK-NEXT: vfmadd.vv v8, v9, v10
630 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %b, <2 x double> %c, <2 x i1> splat (i1 true), i32 %evl)
634 define <2 x double> @vfma_vf_v2f64(<2 x double> %va, double %b, <2 x double> %vc, <2 x i1> %m, i32 zeroext %evl) {
635 ; CHECK-LABEL: vfma_vf_v2f64:
637 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
638 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t
640 %elt.head = insertelement <2 x double> poison, double %b, i32 0
641 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
642 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> %m, i32 %evl)
646 define <2 x double> @vfma_vf_v2f64_unmasked(<2 x double> %va, double %b, <2 x double> %vc, i32 zeroext %evl) {
647 ; CHECK-LABEL: vfma_vf_v2f64_unmasked:
649 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma
650 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9
652 %elt.head = insertelement <2 x double> poison, double %b, i32 0
653 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
654 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %va, <2 x double> %vb, <2 x double> %vc, <2 x i1> splat (i1 true), i32 %evl)
658 declare <4 x double> @llvm.vp.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, <4 x i1>, i32)
660 define <4 x double> @vfma_vv_v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
661 ; CHECK-LABEL: vfma_vv_v4f64:
663 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
664 ; CHECK-NEXT: vfmadd.vv v10, v8, v12, v0.t
665 ; CHECK-NEXT: vmv.v.v v8, v10
667 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 %evl)
671 define <4 x double> @vfma_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %b, <4 x double> %c, i32 zeroext %evl) {
672 ; CHECK-LABEL: vfma_vv_v4f64_unmasked:
674 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
675 ; CHECK-NEXT: vfmadd.vv v8, v10, v12
677 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %b, <4 x double> %c, <4 x i1> splat (i1 true), i32 %evl)
681 define <4 x double> @vfma_vf_v4f64(<4 x double> %va, double %b, <4 x double> %vc, <4 x i1> %m, i32 zeroext %evl) {
682 ; CHECK-LABEL: vfma_vf_v4f64:
684 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
685 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t
687 %elt.head = insertelement <4 x double> poison, double %b, i32 0
688 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
689 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> %m, i32 %evl)
693 define <4 x double> @vfma_vf_v4f64_unmasked(<4 x double> %va, double %b, <4 x double> %vc, i32 zeroext %evl) {
694 ; CHECK-LABEL: vfma_vf_v4f64_unmasked:
696 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma
697 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10
699 %elt.head = insertelement <4 x double> poison, double %b, i32 0
700 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
701 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %va, <4 x double> %vb, <4 x double> %vc, <4 x i1> splat (i1 true), i32 %evl)
705 declare <8 x double> @llvm.vp.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, <8 x i1>, i32)
707 define <8 x double> @vfma_vv_v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
708 ; CHECK-LABEL: vfma_vv_v8f64:
710 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
711 ; CHECK-NEXT: vfmadd.vv v12, v8, v16, v0.t
712 ; CHECK-NEXT: vmv.v.v v8, v12
714 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 %evl)
718 define <8 x double> @vfma_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %b, <8 x double> %c, i32 zeroext %evl) {
719 ; CHECK-LABEL: vfma_vv_v8f64_unmasked:
721 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
722 ; CHECK-NEXT: vfmadd.vv v8, v12, v16
724 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %b, <8 x double> %c, <8 x i1> splat (i1 true), i32 %evl)
728 define <8 x double> @vfma_vf_v8f64(<8 x double> %va, double %b, <8 x double> %vc, <8 x i1> %m, i32 zeroext %evl) {
729 ; CHECK-LABEL: vfma_vf_v8f64:
731 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
732 ; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t
734 %elt.head = insertelement <8 x double> poison, double %b, i32 0
735 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
736 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> %m, i32 %evl)
740 define <8 x double> @vfma_vf_v8f64_unmasked(<8 x double> %va, double %b, <8 x double> %vc, i32 zeroext %evl) {
741 ; CHECK-LABEL: vfma_vf_v8f64_unmasked:
743 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma
744 ; CHECK-NEXT: vfmadd.vf v8, fa0, v12
746 %elt.head = insertelement <8 x double> poison, double %b, i32 0
747 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
748 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %va, <8 x double> %vb, <8 x double> %vc, <8 x i1> splat (i1 true), i32 %evl)
752 declare <15 x double> @llvm.vp.fma.v15f64(<15 x double>, <15 x double>, <15 x double>, <15 x i1>, i32)
754 define <15 x double> @vfma_vv_v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 zeroext %evl) {
755 ; CHECK-LABEL: vfma_vv_v15f64:
757 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
758 ; CHECK-NEXT: vle64.v v24, (a0)
759 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
760 ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
761 ; CHECK-NEXT: vmv.v.v v8, v16
763 %v = call <15 x double> @llvm.vp.fma.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl)
767 define <15 x double> @vfma_vv_v15f64_unmasked(<15 x double> %va, <15 x double> %b, <15 x double> %c, i32 zeroext %evl) {
768 ; CHECK-LABEL: vfma_vv_v15f64_unmasked:
770 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
771 ; CHECK-NEXT: vle64.v v24, (a0)
772 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
773 ; CHECK-NEXT: vfmadd.vv v8, v16, v24
775 %v = call <15 x double> @llvm.vp.fma.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> splat (i1 true), i32 %evl)
779 declare <16 x double> @llvm.vp.fma.v16f64(<16 x double>, <16 x double>, <16 x double>, <16 x i1>, i32)
781 define <16 x double> @vfma_vv_v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 zeroext %evl) {
782 ; CHECK-LABEL: vfma_vv_v16f64:
784 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
785 ; CHECK-NEXT: vle64.v v24, (a0)
786 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
787 ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t
788 ; CHECK-NEXT: vmv.v.v v8, v16
790 %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl)
794 define <16 x double> @vfma_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %b, <16 x double> %c, i32 zeroext %evl) {
795 ; CHECK-LABEL: vfma_vv_v16f64_unmasked:
797 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
798 ; CHECK-NEXT: vle64.v v24, (a0)
799 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
800 ; CHECK-NEXT: vfmadd.vv v8, v16, v24
802 %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> splat (i1 true), i32 %evl)
806 define <16 x double> @vfma_vf_v16f64(<16 x double> %va, double %b, <16 x double> %vc, <16 x i1> %m, i32 zeroext %evl) {
807 ; CHECK-LABEL: vfma_vf_v16f64:
809 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
810 ; CHECK-NEXT: vfmadd.vf v8, fa0, v16, v0.t
812 %elt.head = insertelement <16 x double> poison, double %b, i32 0
813 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer
814 %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> %m, i32 %evl)
818 define <16 x double> @vfma_vf_v16f64_unmasked(<16 x double> %va, double %b, <16 x double> %vc, i32 zeroext %evl) {
819 ; CHECK-LABEL: vfma_vf_v16f64_unmasked:
821 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
822 ; CHECK-NEXT: vfmadd.vf v8, fa0, v16
824 %elt.head = insertelement <16 x double> poison, double %b, i32 0
825 %vb = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer
826 %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %vb, <16 x double> %vc, <16 x i1> splat (i1 true), i32 %evl)
830 declare <32 x double> @llvm.vp.fma.v32f64(<32 x double>, <32 x double>, <32 x double>, <32 x i1>, i32)
832 define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 zeroext %evl) {
833 ; CHECK-LABEL: vfma_vv_v32f64:
835 ; CHECK-NEXT: addi sp, sp, -16
836 ; CHECK-NEXT: .cfi_def_cfa_offset 16
837 ; CHECK-NEXT: csrr a1, vlenb
838 ; CHECK-NEXT: slli a1, a1, 5
839 ; CHECK-NEXT: sub sp, sp, a1
840 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
841 ; CHECK-NEXT: csrr a1, vlenb
842 ; CHECK-NEXT: li a3, 24
843 ; CHECK-NEXT: mul a1, a1, a3
844 ; CHECK-NEXT: add a1, sp, a1
845 ; CHECK-NEXT: addi a1, a1, 16
846 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
847 ; CHECK-NEXT: csrr a1, vlenb
848 ; CHECK-NEXT: slli a1, a1, 4
849 ; CHECK-NEXT: add a1, sp, a1
850 ; CHECK-NEXT: addi a1, a1, 16
851 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
852 ; CHECK-NEXT: addi a1, a2, 128
853 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
854 ; CHECK-NEXT: vle64.v v24, (a2)
855 ; CHECK-NEXT: addi a2, a0, 128
856 ; CHECK-NEXT: vle64.v v8, (a1)
857 ; CHECK-NEXT: csrr a1, vlenb
858 ; CHECK-NEXT: slli a1, a1, 3
859 ; CHECK-NEXT: add a1, sp, a1
860 ; CHECK-NEXT: addi a1, a1, 16
861 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
862 ; CHECK-NEXT: vle64.v v8, (a2)
863 ; CHECK-NEXT: addi a1, sp, 16
864 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
865 ; CHECK-NEXT: vle64.v v8, (a0)
866 ; CHECK-NEXT: li a1, 16
867 ; CHECK-NEXT: mv a0, a4
868 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
869 ; CHECK-NEXT: vslidedown.vi v7, v0, 2
870 ; CHECK-NEXT: bltu a4, a1, .LBB50_2
871 ; CHECK-NEXT: # %bb.1:
872 ; CHECK-NEXT: li a0, 16
873 ; CHECK-NEXT: .LBB50_2:
874 ; CHECK-NEXT: csrr a1, vlenb
875 ; CHECK-NEXT: slli a1, a1, 4
876 ; CHECK-NEXT: add a1, sp, a1
877 ; CHECK-NEXT: addi a1, a1, 16
878 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
879 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
880 ; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
881 ; CHECK-NEXT: csrr a0, vlenb
882 ; CHECK-NEXT: slli a0, a0, 4
883 ; CHECK-NEXT: add a0, sp, a0
884 ; CHECK-NEXT: addi a0, a0, 16
885 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
886 ; CHECK-NEXT: addi a0, a4, -16
887 ; CHECK-NEXT: sltu a1, a4, a0
888 ; CHECK-NEXT: addi a1, a1, -1
889 ; CHECK-NEXT: and a0, a1, a0
890 ; CHECK-NEXT: vmv1r.v v0, v7
891 ; CHECK-NEXT: csrr a1, vlenb
892 ; CHECK-NEXT: li a2, 24
893 ; CHECK-NEXT: mul a1, a1, a2
894 ; CHECK-NEXT: add a1, sp, a1
895 ; CHECK-NEXT: addi a1, a1, 16
896 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
897 ; CHECK-NEXT: csrr a1, vlenb
898 ; CHECK-NEXT: slli a1, a1, 3
899 ; CHECK-NEXT: add a1, sp, a1
900 ; CHECK-NEXT: addi a1, a1, 16
901 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
902 ; CHECK-NEXT: addi a1, sp, 16
903 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
904 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
905 ; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t
906 ; CHECK-NEXT: vmv.v.v v16, v8
907 ; CHECK-NEXT: csrr a0, vlenb
908 ; CHECK-NEXT: slli a0, a0, 4
909 ; CHECK-NEXT: add a0, sp, a0
910 ; CHECK-NEXT: addi a0, a0, 16
911 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
912 ; CHECK-NEXT: csrr a0, vlenb
913 ; CHECK-NEXT: slli a0, a0, 5
914 ; CHECK-NEXT: add sp, sp, a0
915 ; CHECK-NEXT: .cfi_def_cfa sp, 16
916 ; CHECK-NEXT: addi sp, sp, 16
917 ; CHECK-NEXT: .cfi_def_cfa_offset 0
919 %v = call <32 x double> @llvm.vp.fma.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> %m, i32 %evl)
923 define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %b, <32 x double> %c, i32 zeroext %evl) {
924 ; CHECK-LABEL: vfma_vv_v32f64_unmasked:
926 ; CHECK-NEXT: addi sp, sp, -16
927 ; CHECK-NEXT: .cfi_def_cfa_offset 16
928 ; CHECK-NEXT: csrr a1, vlenb
929 ; CHECK-NEXT: li a3, 24
930 ; CHECK-NEXT: mul a1, a1, a3
931 ; CHECK-NEXT: sub sp, sp, a1
932 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
933 ; CHECK-NEXT: csrr a1, vlenb
934 ; CHECK-NEXT: slli a1, a1, 4
935 ; CHECK-NEXT: add a1, sp, a1
936 ; CHECK-NEXT: addi a1, a1, 16
937 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
938 ; CHECK-NEXT: csrr a1, vlenb
939 ; CHECK-NEXT: slli a1, a1, 3
940 ; CHECK-NEXT: add a1, sp, a1
941 ; CHECK-NEXT: addi a1, a1, 16
942 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
943 ; CHECK-NEXT: addi a1, a2, 128
944 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
945 ; CHECK-NEXT: vle64.v v16, (a2)
946 ; CHECK-NEXT: addi a2, a0, 128
947 ; CHECK-NEXT: vle64.v v8, (a1)
948 ; CHECK-NEXT: addi a1, sp, 16
949 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
950 ; CHECK-NEXT: vle64.v v24, (a2)
951 ; CHECK-NEXT: vle64.v v0, (a0)
952 ; CHECK-NEXT: li a1, 16
953 ; CHECK-NEXT: mv a0, a4
954 ; CHECK-NEXT: bltu a4, a1, .LBB51_2
955 ; CHECK-NEXT: # %bb.1:
956 ; CHECK-NEXT: li a0, 16
957 ; CHECK-NEXT: .LBB51_2:
958 ; CHECK-NEXT: csrr a1, vlenb
959 ; CHECK-NEXT: slli a1, a1, 3
960 ; CHECK-NEXT: add a1, sp, a1
961 ; CHECK-NEXT: addi a1, a1, 16
962 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
963 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
964 ; CHECK-NEXT: vfmadd.vv v0, v8, v16
965 ; CHECK-NEXT: addi a0, a4, -16
966 ; CHECK-NEXT: sltu a1, a4, a0
967 ; CHECK-NEXT: addi a1, a1, -1
968 ; CHECK-NEXT: and a0, a1, a0
969 ; CHECK-NEXT: csrr a1, vlenb
970 ; CHECK-NEXT: slli a1, a1, 4
971 ; CHECK-NEXT: add a1, sp, a1
972 ; CHECK-NEXT: addi a1, a1, 16
973 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
974 ; CHECK-NEXT: addi a1, sp, 16
975 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
976 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
977 ; CHECK-NEXT: vfmadd.vv v24, v16, v8
978 ; CHECK-NEXT: vmv8r.v v8, v0
979 ; CHECK-NEXT: vmv.v.v v16, v24
980 ; CHECK-NEXT: csrr a0, vlenb
981 ; CHECK-NEXT: li a1, 24
982 ; CHECK-NEXT: mul a0, a0, a1
983 ; CHECK-NEXT: add sp, sp, a0
984 ; CHECK-NEXT: .cfi_def_cfa sp, 16
985 ; CHECK-NEXT: addi sp, sp, 16
986 ; CHECK-NEXT: .cfi_def_cfa_offset 0
988 %v = call <32 x double> @llvm.vp.fma.v32f64(<32 x double> %va, <32 x double> %b, <32 x double> %c, <32 x i1> splat (i1 true), i32 %evl)