1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN
11 declare <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
12 declare <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
13 declare <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
14 declare <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1>, <vscale x 1 x float>, <vscale x 1 x float>, i32)
16 define <vscale x 1 x float> @vmfsac_vv_nxv1f32(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
17 ; ZVFH-LABEL: vmfsac_vv_nxv1f32:
19 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
20 ; ZVFH-NEXT: vfwmsac.vv v10, v8, v9, v0.t
21 ; ZVFH-NEXT: vmv1r.v v8, v10
24 ; ZVFHMIN-LABEL: vmfsac_vv_nxv1f32:
26 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
27 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
28 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t
29 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
30 ; ZVFHMIN-NEXT: vfmsub.vv v8, v11, v10, v0.t
32 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
33 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
34 %negc = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
35 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %negc, <vscale x 1 x i1> %m, i32 %evl)
36 ret <vscale x 1 x float> %v
39 define <vscale x 1 x float> @vmfsac_vv_nxv1f32_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
40 ; ZVFH-LABEL: vmfsac_vv_nxv1f32_unmasked:
42 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
43 ; ZVFH-NEXT: vfwmsac.vv v10, v8, v9
44 ; ZVFH-NEXT: vmv1r.v v8, v10
47 ; ZVFHMIN-LABEL: vmfsac_vv_nxv1f32_unmasked:
49 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
50 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
51 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
52 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
53 ; ZVFHMIN-NEXT: vfmsub.vv v8, v11, v10
55 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
56 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
57 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %allones, i32 %evl)
58 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %allones, i32 %evl)
59 %negc = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> %c, <vscale x 1 x i1> %allones, i32 %evl)
60 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %negc, <vscale x 1 x i1> %allones, i32 %evl)
61 ret <vscale x 1 x float> %v
64 define <vscale x 1 x float> @vmfsac_vv_nxv1f32_tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
65 ; ZVFH-LABEL: vmfsac_vv_nxv1f32_tu:
67 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
68 ; ZVFH-NEXT: vfwmsac.vv v10, v8, v9, v0.t
69 ; ZVFH-NEXT: vmv1r.v v8, v10
72 ; ZVFHMIN-LABEL: vmfsac_vv_nxv1f32_tu:
74 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
75 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
76 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
77 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
78 ; ZVFHMIN-NEXT: vfmsac.vv v10, v11, v8, v0.t
79 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
81 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
82 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
83 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %allones, i32 %evl)
84 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %allones, i32 %evl)
85 %negc = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> %c, <vscale x 1 x i1> %allones, i32 %evl)
86 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %negc, <vscale x 1 x i1> %allones, i32 %evl)
87 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
88 ret <vscale x 1 x float> %u
91 define <vscale x 1 x float> @vmfsac_vv_nxv1f32_unmasked_tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
92 ; ZVFH-LABEL: vmfsac_vv_nxv1f32_unmasked_tu:
94 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
95 ; ZVFH-NEXT: vfwmsac.vv v10, v8, v9
96 ; ZVFH-NEXT: vmv1r.v v8, v10
99 ; ZVFHMIN-LABEL: vmfsac_vv_nxv1f32_unmasked_tu:
101 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
102 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
103 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
104 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
105 ; ZVFHMIN-NEXT: vfmsac.vv v10, v11, v8
106 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
108 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
109 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
110 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %allones, i32 %evl)
111 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %allones, i32 %evl)
112 %negc = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> %c, <vscale x 1 x i1> %allones, i32 %evl)
113 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %negc, <vscale x 1 x i1> %allones, i32 %evl)
114 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %allones, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
115 ret <vscale x 1 x float> %u
118 define <vscale x 1 x float> @vmfsac_vf_nxv1f32(<vscale x 1 x half> %a, half %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
119 ; ZVFH-LABEL: vmfsac_vf_nxv1f32:
121 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
122 ; ZVFH-NEXT: vfwmsac.vf v9, fa0, v8, v0.t
123 ; ZVFH-NEXT: vmv1r.v v8, v9
126 ; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32:
128 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
129 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
130 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
131 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
132 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
133 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
134 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
135 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
136 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
137 ; ZVFHMIN-NEXT: vfmsub.vv v8, v10, v9, v0.t
139 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
140 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
141 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
142 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
143 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
144 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
145 %negc = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
146 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %negc, <vscale x 1 x i1> %m, i32 %evl)
147 ret <vscale x 1 x float> %v
150 define <vscale x 1 x float> @vmfsac_vf_nxv1f32_commute(<vscale x 1 x half> %a, half %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
151 ; ZVFH-LABEL: vmfsac_vf_nxv1f32_commute:
153 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
154 ; ZVFH-NEXT: vfwmsac.vf v9, fa0, v8, v0.t
155 ; ZVFH-NEXT: vmv1r.v v8, v9
158 ; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32_commute:
160 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
161 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
162 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
163 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
164 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
165 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
166 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
167 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
168 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
169 ; ZVFHMIN-NEXT: vfmsub.vv v10, v8, v9, v0.t
170 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
172 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
173 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
174 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
175 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
176 %negc = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
177 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vbext, <vscale x 1 x float> %aext, <vscale x 1 x float> %negc, <vscale x 1 x i1> %m, i32 %evl)
178 ret <vscale x 1 x float> %v
181 define <vscale x 1 x float> @vmfsac_vf_nxv1f32_unmasked(<vscale x 1 x half> %a, half %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
182 ; ZVFH-LABEL: vmfsac_vf_nxv1f32_unmasked:
184 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
185 ; ZVFH-NEXT: vfwmsac.vf v9, fa0, v8
186 ; ZVFH-NEXT: vmv1r.v v8, v9
189 ; ZVFHMIN-LABEL: vmfsac_vf_nxv1f32_unmasked:
191 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
192 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
193 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
194 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
195 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
196 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
197 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
198 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
199 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
200 ; ZVFHMIN-NEXT: vfmsub.vv v8, v10, v9
202 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
203 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
204 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
205 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
206 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %allones, i32 %evl)
207 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %allones, i32 %evl)
208 %negc = call <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float> %c, <vscale x 1 x i1> %allones, i32 %evl)
209 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %negc, <vscale x 1 x i1> %allones, i32 %evl)
210 ret <vscale x 1 x float> %v
213 declare <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32)
214 declare <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
215 declare <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
217 define <vscale x 2 x float> @vmfsac_vv_nxv2f32(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
218 ; ZVFH-LABEL: vmfsac_vv_nxv2f32:
220 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
221 ; ZVFH-NEXT: vfwmsac.vv v10, v8, v9, v0.t
222 ; ZVFH-NEXT: vmv1r.v v8, v10
225 ; ZVFHMIN-LABEL: vmfsac_vv_nxv2f32:
227 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
228 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
229 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t
230 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
231 ; ZVFHMIN-NEXT: vfmsub.vv v8, v11, v10, v0.t
233 %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl)
234 %bext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
235 %negc = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 %evl)
236 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %bext, <vscale x 2 x float> %negc, <vscale x 2 x i1> %m, i32 %evl)
237 ret <vscale x 2 x float> %v
240 define <vscale x 2 x float> @vmfsac_vv_nxv2f32_unmasked(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x float> %c, i32 zeroext %evl) {
241 ; ZVFH-LABEL: vmfsac_vv_nxv2f32_unmasked:
243 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
244 ; ZVFH-NEXT: vfwmsac.vv v10, v8, v9
245 ; ZVFH-NEXT: vmv1r.v v8, v10
248 ; ZVFHMIN-LABEL: vmfsac_vv_nxv2f32_unmasked:
250 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
251 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
252 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
253 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
254 ; ZVFHMIN-NEXT: vfmsub.vv v8, v11, v10
256 %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
257 %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
258 %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %allones, i32 %evl)
259 %bext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %allones, i32 %evl)
260 %negc = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> %c, <vscale x 2 x i1> %allones, i32 %evl)
261 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %bext, <vscale x 2 x float> %negc, <vscale x 2 x i1> %allones, i32 %evl)
262 ret <vscale x 2 x float> %v
265 define <vscale x 2 x float> @vmfsac_vf_nxv2f32(<vscale x 2 x half> %a, half %b, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
266 ; ZVFH-LABEL: vmfsac_vf_nxv2f32:
268 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
269 ; ZVFH-NEXT: vfwmsac.vf v9, fa0, v8, v0.t
270 ; ZVFH-NEXT: vmv1r.v v8, v9
273 ; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32:
275 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
276 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
277 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
278 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
279 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
280 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
281 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
282 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
283 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
284 ; ZVFHMIN-NEXT: vfmsub.vv v8, v10, v9, v0.t
286 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
287 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
288 %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
289 %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
290 %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl)
291 %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
292 %negc = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 %evl)
293 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %vbext, <vscale x 2 x float> %negc, <vscale x 2 x i1> %m, i32 %evl)
294 ret <vscale x 2 x float> %v
297 define <vscale x 2 x float> @vmfsac_vf_nxv2f32_commute(<vscale x 2 x half> %a, half %b, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
298 ; ZVFH-LABEL: vmfsac_vf_nxv2f32_commute:
300 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
301 ; ZVFH-NEXT: vfwmsac.vf v9, fa0, v8, v0.t
302 ; ZVFH-NEXT: vmv1r.v v8, v9
305 ; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32_commute:
307 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
308 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
309 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
310 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
311 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
312 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
313 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
314 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
315 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
316 ; ZVFHMIN-NEXT: vfmsub.vv v10, v8, v9, v0.t
317 ; ZVFHMIN-NEXT: vmv.v.v v8, v10
319 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
320 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
321 %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl)
322 %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
323 %negc = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 %evl)
324 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %vbext, <vscale x 2 x float> %aext, <vscale x 2 x float> %negc, <vscale x 2 x i1> %m, i32 %evl)
325 ret <vscale x 2 x float> %v
328 define <vscale x 2 x float> @vmfsac_vf_nxv2f32_unmasked(<vscale x 2 x half> %a, half %b, <vscale x 2 x float> %c, i32 zeroext %evl) {
329 ; ZVFH-LABEL: vmfsac_vf_nxv2f32_unmasked:
331 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
332 ; ZVFH-NEXT: vfwmsac.vf v9, fa0, v8
333 ; ZVFH-NEXT: vmv1r.v v8, v9
336 ; ZVFHMIN-LABEL: vmfsac_vf_nxv2f32_unmasked:
338 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
339 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
340 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
341 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
342 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
343 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
344 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
345 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
346 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
347 ; ZVFHMIN-NEXT: vfmsub.vv v8, v10, v9
349 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
350 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
351 %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
352 %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
353 %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %allones, i32 %evl)
354 %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> %allones, i32 %evl)
355 %negc = call <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float> %c, <vscale x 2 x i1> %allones, i32 %evl)
356 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %vbext, <vscale x 2 x float> %negc, <vscale x 2 x i1> %allones, i32 %evl)
357 ret <vscale x 2 x float> %v
360 declare <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, i32)
361 declare <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
362 declare <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
364 define <vscale x 4 x float> @vmfsac_vv_nxv4f32(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
365 ; ZVFH-LABEL: vmfsac_vv_nxv4f32:
367 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
368 ; ZVFH-NEXT: vfwmsac.vv v10, v8, v9, v0.t
369 ; ZVFH-NEXT: vmv2r.v v8, v10
372 ; ZVFHMIN-LABEL: vmfsac_vv_nxv4f32:
374 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
375 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
376 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
377 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
378 ; ZVFHMIN-NEXT: vfmsub.vv v12, v14, v10, v0.t
379 ; ZVFHMIN-NEXT: vmv.v.v v8, v12
381 %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
382 %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
383 %negc = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 %evl)
384 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %bext, <vscale x 4 x float> %negc, <vscale x 4 x i1> %m, i32 %evl)
385 ret <vscale x 4 x float> %v
388 define <vscale x 4 x float> @vmfsac_vv_nxv4f32_unmasked(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x float> %c, i32 zeroext %evl) {
389 ; ZVFH-LABEL: vmfsac_vv_nxv4f32_unmasked:
391 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
392 ; ZVFH-NEXT: vfwmsac.vv v10, v8, v9
393 ; ZVFH-NEXT: vmv2r.v v8, v10
396 ; ZVFHMIN-LABEL: vmfsac_vv_nxv4f32_unmasked:
398 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
399 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
400 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
401 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
402 ; ZVFHMIN-NEXT: vfmsub.vv v12, v14, v10
403 ; ZVFHMIN-NEXT: vmv.v.v v8, v12
405 %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
406 %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
407 %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %allones, i32 %evl)
408 %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %allones, i32 %evl)
409 %negc = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> %c, <vscale x 4 x i1> %allones, i32 %evl)
410 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %bext, <vscale x 4 x float> %negc, <vscale x 4 x i1> %allones, i32 %evl)
411 ret <vscale x 4 x float> %v
414 define <vscale x 4 x float> @vmfsac_vf_nxv4f32(<vscale x 4 x half> %a, half %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
415 ; ZVFH-LABEL: vmfsac_vf_nxv4f32:
417 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
418 ; ZVFH-NEXT: vfwmsac.vf v10, fa0, v8, v0.t
419 ; ZVFH-NEXT: vmv2r.v v8, v10
422 ; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32:
424 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
425 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
426 ; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
427 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
428 ; ZVFHMIN-NEXT: vfncvt.f.f.w v14, v12
429 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
430 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
431 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14, v0.t
432 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
433 ; ZVFHMIN-NEXT: vfmsub.vv v8, v12, v10, v0.t
435 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
436 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
437 %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
438 %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
439 %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
440 %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
441 %negc = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 %evl)
442 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %vbext, <vscale x 4 x float> %negc, <vscale x 4 x i1> %m, i32 %evl)
443 ret <vscale x 4 x float> %v
446 define <vscale x 4 x float> @vmfsac_vf_nxv4f32_commute(<vscale x 4 x half> %a, half %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
447 ; ZVFH-LABEL: vmfsac_vf_nxv4f32_commute:
449 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
450 ; ZVFH-NEXT: vfwmsac.vf v10, fa0, v8, v0.t
451 ; ZVFH-NEXT: vmv2r.v v8, v10
454 ; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32_commute:
456 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
457 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
458 ; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
459 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
460 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v12
461 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
462 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
463 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9, v0.t
464 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
465 ; ZVFHMIN-NEXT: vfmsub.vv v12, v14, v10, v0.t
466 ; ZVFHMIN-NEXT: vmv.v.v v8, v12
468 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
469 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
470 %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
471 %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
472 %negc = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 %evl)
473 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %vbext, <vscale x 4 x float> %aext, <vscale x 4 x float> %negc, <vscale x 4 x i1> %m, i32 %evl)
474 ret <vscale x 4 x float> %v
477 define <vscale x 4 x float> @vmfsac_vf_nxv4f32_unmasked(<vscale x 4 x half> %a, half %b, <vscale x 4 x float> %c, i32 zeroext %evl) {
478 ; ZVFH-LABEL: vmfsac_vf_nxv4f32_unmasked:
480 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
481 ; ZVFH-NEXT: vfwmsac.vf v10, fa0, v8
482 ; ZVFH-NEXT: vmv2r.v v8, v10
485 ; ZVFHMIN-LABEL: vmfsac_vf_nxv4f32_unmasked:
487 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
488 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
489 ; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
490 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
491 ; ZVFHMIN-NEXT: vfncvt.f.f.w v14, v12
492 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
493 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
494 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14
495 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
496 ; ZVFHMIN-NEXT: vfmsub.vv v8, v12, v10
498 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
499 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
500 %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
501 %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
502 %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %allones, i32 %evl)
503 %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> %allones, i32 %evl)
504 %negc = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> %c, <vscale x 4 x i1> %allones, i32 %evl)
505 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %vbext, <vscale x 4 x float> %negc, <vscale x 4 x i1> %allones, i32 %evl)
506 ret <vscale x 4 x float> %v
509 declare <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x i1>, i32)
510 declare <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
511 declare <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
513 define <vscale x 8 x float> @vmfsac_vv_nxv8f32(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
514 ; ZVFH-LABEL: vmfsac_vv_nxv8f32:
516 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
517 ; ZVFH-NEXT: vfwmsac.vv v12, v8, v10, v0.t
518 ; ZVFH-NEXT: vmv4r.v v8, v12
521 ; ZVFHMIN-LABEL: vmfsac_vv_nxv8f32:
523 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
524 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
525 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
526 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
527 ; ZVFHMIN-NEXT: vfmsub.vv v16, v20, v12, v0.t
528 ; ZVFHMIN-NEXT: vmv.v.v v8, v16
530 %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
531 %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
532 %negc = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 %evl)
533 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %bext, <vscale x 8 x float> %negc, <vscale x 8 x i1> %m, i32 %evl)
534 ret <vscale x 8 x float> %v
537 define <vscale x 8 x float> @vmfsac_vv_nxv8f32_unmasked(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x float> %c, i32 zeroext %evl) {
538 ; ZVFH-LABEL: vmfsac_vv_nxv8f32_unmasked:
540 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
541 ; ZVFH-NEXT: vfwmsac.vv v12, v8, v10
542 ; ZVFH-NEXT: vmv4r.v v8, v12
545 ; ZVFHMIN-LABEL: vmfsac_vv_nxv8f32_unmasked:
547 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
548 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
549 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
550 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
551 ; ZVFHMIN-NEXT: vfmsub.vv v16, v20, v12
552 ; ZVFHMIN-NEXT: vmv.v.v v8, v16
554 %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
555 %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
556 %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %allones, i32 %evl)
557 %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %allones, i32 %evl)
558 %negc = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> %c, <vscale x 8 x i1> %allones, i32 %evl)
559 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %bext, <vscale x 8 x float> %negc, <vscale x 8 x i1> %allones, i32 %evl)
560 ret <vscale x 8 x float> %v
563 define <vscale x 8 x float> @vmfsac_vf_nxv8f32(<vscale x 8 x half> %a, half %b, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
564 ; ZVFH-LABEL: vmfsac_vf_nxv8f32:
566 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
567 ; ZVFH-NEXT: vfwmsac.vf v12, fa0, v8, v0.t
568 ; ZVFH-NEXT: vmv4r.v v8, v12
571 ; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32:
573 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
574 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
575 ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
576 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
577 ; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v16
578 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
579 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
580 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
581 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
582 ; ZVFHMIN-NEXT: vfmsub.vv v8, v16, v12, v0.t
584 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
585 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
586 %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
587 %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
588 %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
589 %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
590 %negc = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 %evl)
591 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %vbext, <vscale x 8 x float> %negc, <vscale x 8 x i1> %m, i32 %evl)
592 ret <vscale x 8 x float> %v
595 define <vscale x 8 x float> @vmfsac_vf_nxv8f32_commute(<vscale x 8 x half> %a, half %b, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
596 ; ZVFH-LABEL: vmfsac_vf_nxv8f32_commute:
598 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
599 ; ZVFH-NEXT: vfwmsac.vf v12, fa0, v8, v0.t
600 ; ZVFH-NEXT: vmv4r.v v8, v12
603 ; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32_commute:
605 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
606 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
607 ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
608 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
609 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v16
610 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
611 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
612 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10, v0.t
613 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
614 ; ZVFHMIN-NEXT: vfmsub.vv v16, v20, v12, v0.t
615 ; ZVFHMIN-NEXT: vmv.v.v v8, v16
617 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
618 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
619 %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
620 %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
621 %negc = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 %evl)
622 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %vbext, <vscale x 8 x float> %aext, <vscale x 8 x float> %negc, <vscale x 8 x i1> %m, i32 %evl)
623 ret <vscale x 8 x float> %v
626 define <vscale x 8 x float> @vmfsac_vf_nxv8f32_unmasked(<vscale x 8 x half> %a, half %b, <vscale x 8 x float> %c, i32 zeroext %evl) {
627 ; ZVFH-LABEL: vmfsac_vf_nxv8f32_unmasked:
629 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
630 ; ZVFH-NEXT: vfwmsac.vf v12, fa0, v8
631 ; ZVFH-NEXT: vmv4r.v v8, v12
634 ; ZVFHMIN-LABEL: vmfsac_vf_nxv8f32_unmasked:
636 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
637 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
638 ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
639 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
640 ; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v16
641 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
642 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
643 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
644 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
645 ; ZVFHMIN-NEXT: vfmsub.vv v8, v16, v12
647 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
648 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
649 %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
650 %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
651 %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %allones, i32 %evl)
652 %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> %allones, i32 %evl)
653 %negc = call <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float> %c, <vscale x 8 x i1> %allones, i32 %evl)
654 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %vbext, <vscale x 8 x float> %negc, <vscale x 8 x i1> %allones, i32 %evl)
655 ret <vscale x 8 x float> %v