1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
11 declare <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
12 declare <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
13 declare <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
14 declare <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1>, <vscale x 1 x float>, <vscale x 1 x float>, i32)
16 define <vscale x 1 x float> @vfmacc_vv_nxv1f32(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
17 ; ZVFH-LABEL: vfmacc_vv_nxv1f32:
19 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
20 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
21 ; ZVFH-NEXT: vmv1r.v v8, v10
24 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32:
26 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
27 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
28 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t
29 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
30 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10, v0.t
32 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
33 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
34 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
35 ret <vscale x 1 x float> %v
38 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
39 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_unmasked:
41 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
42 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9
43 ; ZVFH-NEXT: vmv1r.v v8, v10
46 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_unmasked:
48 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
49 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
50 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
51 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
52 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10
54 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
55 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
56 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
57 ret <vscale x 1 x float> %v
60 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
61 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_tu:
63 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
64 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
65 ; ZVFH-NEXT: vmv1r.v v8, v10
68 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_tu:
70 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
71 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
72 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
73 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
74 ; ZVFHMIN-NEXT: vfmacc.vv v10, v11, v8, v0.t
75 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
77 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
78 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
79 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
80 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
81 ret <vscale x 1 x float> %u
84 ; FIXME: Support this case?
85 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_masked__tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
86 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_masked__tu:
88 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
89 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
90 ; ZVFH-NEXT: vmv1r.v v8, v10
93 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_masked__tu:
95 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
96 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
97 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t
98 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
99 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10, v0.t
100 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
101 ; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v8, v0
102 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
104 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
105 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
106 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
107 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
108 ret <vscale x 1 x float> %u
111 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_unmasked_tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
112 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_unmasked_tu:
114 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
115 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9
116 ; ZVFH-NEXT: vmv1r.v v8, v10
119 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_unmasked_tu:
121 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
122 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
123 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
124 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
125 ; ZVFHMIN-NEXT: vfmacc.vv v10, v11, v8
126 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
128 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
129 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
130 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
131 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
132 ret <vscale x 1 x float> %u
135 define <vscale x 1 x float> @vfmacc_vf_nxv1f32(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
136 ; ZVFH-LABEL: vfmacc_vf_nxv1f32:
138 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
139 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
140 ; ZVFH-NEXT: vmv1r.v v8, v9
143 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32:
145 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
146 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
147 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
148 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
149 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
150 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t
151 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
152 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v9, v0.t
154 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
155 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
156 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
157 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
158 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 %evl)
159 ret <vscale x 1 x float> %v
162 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
163 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_commute:
165 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
166 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
167 ; ZVFH-NEXT: vmv1r.v v8, v9
170 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute:
172 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
173 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
174 ; ZVFHMIN-NEXT: vmv.v.x v11, a1
175 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
176 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
177 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
178 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
179 ; ZVFHMIN-NEXT: vfmadd.vv v10, v8, v9, v0.t
180 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
182 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
183 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
184 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
185 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
186 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vbext, <vscale x 1 x float> %vaext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 %evl)
187 ret <vscale x 1 x float> %v
190 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, i32 zeroext %evl) {
191 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_unmasked:
193 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
194 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8
195 ; ZVFH-NEXT: vmv1r.v v8, v9
198 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked:
200 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
201 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
202 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
203 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
204 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
205 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
206 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
207 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v9
209 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
210 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
211 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
212 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
213 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
214 ret <vscale x 1 x float> %v
217 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
218 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_tu:
220 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
221 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
222 ; ZVFH-NEXT: vmv1r.v v8, v9
225 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_tu:
227 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
228 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
229 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
230 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
231 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
232 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
233 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
234 ; ZVFHMIN-NEXT: vfmacc.vv v9, v11, v8, v0.t
235 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
237 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
238 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
239 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
240 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
241 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
242 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
243 ret <vscale x 1 x float> %u
246 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
247 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_commute_tu:
249 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
250 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
251 ; ZVFH-NEXT: vmv1r.v v8, v9
254 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute_tu:
256 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
257 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
258 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
259 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
260 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
261 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
262 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
263 ; ZVFHMIN-NEXT: vfmacc.vv v9, v8, v11, v0.t
264 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
266 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
267 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
268 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
269 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
270 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vbext, <vscale x 1 x float> %vaext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
271 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
272 ret <vscale x 1 x float> %u
275 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, i32 zeroext %evl) {
276 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_unmasked_tu:
278 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
279 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8
280 ; ZVFH-NEXT: vmv1r.v v8, v9
283 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked_tu:
285 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
286 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
287 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
288 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
289 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
290 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
291 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
292 ; ZVFHMIN-NEXT: vfmacc.vv v9, v11, v8
293 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
295 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
296 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
297 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
298 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
299 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
300 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
301 ret <vscale x 1 x float> %u
304 declare <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32)
305 declare <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
306 declare <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
307 declare <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
308 declare <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
310 define <vscale x 2 x float> @vfmacc_vv_nxv2f32(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
311 ; ZVFH-LABEL: vfmacc_vv_nxv2f32:
313 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
314 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
315 ; ZVFH-NEXT: vmv1r.v v8, v10
318 ; ZVFHMIN-LABEL: vfmacc_vv_nxv2f32:
320 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
321 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
322 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t
323 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
324 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10, v0.t
326 %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl)
327 %bext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
328 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %bext, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 %evl)
329 ret <vscale x 2 x float> %v
332 define <vscale x 2 x float> @vfmacc_vv_nxv2f32_unmasked(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x float> %c, i32 zeroext %evl) {
333 ; ZVFH-LABEL: vfmacc_vv_nxv2f32_unmasked:
335 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
336 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9
337 ; ZVFH-NEXT: vmv1r.v v8, v10
340 ; ZVFHMIN-LABEL: vfmacc_vv_nxv2f32_unmasked:
342 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
343 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
344 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
345 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
346 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10
348 %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
349 %bext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
350 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %bext, <vscale x 2 x float> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
351 ret <vscale x 2 x float> %v
354 define <vscale x 2 x float> @vfmacc_vf_nxv2f32(<vscale x 2 x half> %va, half %b, <vscale x 2 x float> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) {
355 ; ZVFH-LABEL: vfmacc_vf_nxv2f32:
357 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
358 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
359 ; ZVFH-NEXT: vmv1r.v v8, v9
362 ; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32:
364 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
365 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
366 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
367 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
368 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
369 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10, v0.t
370 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
371 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v9, v0.t
373 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
374 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
375 %vaext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
376 %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
377 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %vaext, <vscale x 2 x float> %vbext, <vscale x 2 x float> %vc, <vscale x 2 x i1> %m, i32 %evl)
378 ret <vscale x 2 x float> %v
381 define <vscale x 2 x float> @vfmacc_vf_nxv2f32_unmasked(<vscale x 2 x half> %va, half %b, <vscale x 2 x float> %vc, i32 zeroext %evl) {
382 ; ZVFH-LABEL: vfmacc_vf_nxv2f32_unmasked:
384 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
385 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8
386 ; ZVFH-NEXT: vmv1r.v v8, v9
389 ; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32_unmasked:
391 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
392 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
393 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
394 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
395 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
396 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
397 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
398 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v9
400 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
401 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
402 %vaext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
403 %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
404 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %vaext, <vscale x 2 x float> %vbext, <vscale x 2 x float> %vc, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
405 ret <vscale x 2 x float> %v
408 declare <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, i32)
409 declare <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
410 declare <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
411 declare <vscale x 4 x float> @llvm.vp.select.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
412 declare <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
414 define <vscale x 4 x float> @vfmacc_vv_nxv4f32(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
415 ; ZVFH-LABEL: vfmacc_vv_nxv4f32:
417 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
418 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
419 ; ZVFH-NEXT: vmv2r.v v8, v10
422 ; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32:
424 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
425 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
426 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
427 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
428 ; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10, v0.t
429 ; ZVFHMIN-NEXT: vmv.v.v v8, v12
431 %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
432 %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
433 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %bext, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 %evl)
434 ret <vscale x 4 x float> %v
437 define <vscale x 4 x float> @vfmacc_vv_nxv4f32_unmasked(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x float> %c, i32 zeroext %evl) {
438 ; ZVFH-LABEL: vfmacc_vv_nxv4f32_unmasked:
440 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
441 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9
442 ; ZVFH-NEXT: vmv2r.v v8, v10
445 ; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32_unmasked:
447 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
448 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
449 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
450 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
451 ; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10
452 ; ZVFHMIN-NEXT: vmv.v.v v8, v12
454 %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
455 %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
456 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %bext, <vscale x 4 x float> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
457 ret <vscale x 4 x float> %v
460 define <vscale x 4 x float> @vfmacc_vf_nxv4f32(<vscale x 4 x half> %va, half %b, <vscale x 4 x float> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) {
461 ; ZVFH-LABEL: vfmacc_vf_nxv4f32:
463 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
464 ; ZVFH-NEXT: vfwmacc.vf v10, fa0, v8, v0.t
465 ; ZVFH-NEXT: vmv2r.v v8, v10
468 ; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32:
470 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
471 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma
472 ; ZVFHMIN-NEXT: vmv.v.x v12, a1
473 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
474 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
475 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12, v0.t
476 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
477 ; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v10, v0.t
479 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
480 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
481 %vaext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
482 %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
483 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %vaext, <vscale x 4 x float> %vbext, <vscale x 4 x float> %vc, <vscale x 4 x i1> %m, i32 %evl)
484 ret <vscale x 4 x float> %v
487 define <vscale x 4 x float> @vfmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %va, half %b, <vscale x 4 x float> %vc, i32 zeroext %evl) {
488 ; ZVFH-LABEL: vfmacc_vf_nxv4f32_unmasked:
490 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
491 ; ZVFH-NEXT: vfwmacc.vf v10, fa0, v8
492 ; ZVFH-NEXT: vmv2r.v v8, v10
495 ; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32_unmasked:
497 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
498 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m1, ta, ma
499 ; ZVFHMIN-NEXT: vmv.v.x v12, a1
500 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
501 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
502 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
503 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
504 ; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v10
506 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
507 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
508 %vaext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
509 %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
510 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %vaext, <vscale x 4 x float> %vbext, <vscale x 4 x float> %vc, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
511 ret <vscale x 4 x float> %v
514 declare <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x i1>, i32)
515 declare <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
516 declare <vscale x 8 x float> @llvm.vp.merge.nxv8f32(<vscale x 8 x i1>, <vscale x 8 x float>, <vscale x 8 x float>, i32)
517 declare <vscale x 8 x float> @llvm.vp.select.nxv8f32(<vscale x 8 x i1>, <vscale x 8 x float>, <vscale x 8 x float>, i32)
518 declare <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
520 define <vscale x 8 x float> @vfmacc_vv_nxv8f32(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
521 ; ZVFH-LABEL: vfmacc_vv_nxv8f32:
523 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
524 ; ZVFH-NEXT: vfwmacc.vv v12, v8, v10, v0.t
525 ; ZVFH-NEXT: vmv4r.v v8, v12
528 ; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32:
530 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
531 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
532 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
533 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
534 ; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12, v0.t
535 ; ZVFHMIN-NEXT: vmv.v.v v8, v16
537 %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
538 %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
539 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %bext, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 %evl)
540 ret <vscale x 8 x float> %v
543 define <vscale x 8 x float> @vfmacc_vv_nxv8f32_unmasked(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x float> %c, i32 zeroext %evl) {
544 ; ZVFH-LABEL: vfmacc_vv_nxv8f32_unmasked:
546 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
547 ; ZVFH-NEXT: vfwmacc.vv v12, v8, v10
548 ; ZVFH-NEXT: vmv4r.v v8, v12
551 ; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32_unmasked:
553 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
554 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
555 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
556 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
557 ; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12
558 ; ZVFHMIN-NEXT: vmv.v.v v8, v16
560 %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
561 %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
562 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %bext, <vscale x 8 x float> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
563 ret <vscale x 8 x float> %v
566 define <vscale x 8 x float> @vfmacc_vf_nxv8f32(<vscale x 8 x half> %va, half %b, <vscale x 8 x float> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) {
567 ; ZVFH-LABEL: vfmacc_vf_nxv8f32:
569 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
570 ; ZVFH-NEXT: vfwmacc.vf v12, fa0, v8, v0.t
571 ; ZVFH-NEXT: vmv4r.v v8, v12
574 ; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32:
576 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
577 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma
578 ; ZVFHMIN-NEXT: vmv.v.x v16, a1
579 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
580 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
581 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16, v0.t
582 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
583 ; ZVFHMIN-NEXT: vfmadd.vv v8, v20, v12, v0.t
585 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
586 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
587 %vaext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
588 %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
589 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %vaext, <vscale x 8 x float> %vbext, <vscale x 8 x float> %vc, <vscale x 8 x i1> %m, i32 %evl)
590 ret <vscale x 8 x float> %v
593 define <vscale x 8 x float> @vfmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %va, half %b, <vscale x 8 x float> %vc, i32 zeroext %evl) {
594 ; ZVFH-LABEL: vfmacc_vf_nxv8f32_unmasked:
596 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
597 ; ZVFH-NEXT: vfwmacc.vf v12, fa0, v8
598 ; ZVFH-NEXT: vmv4r.v v8, v12
601 ; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32_unmasked:
603 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
604 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m2, ta, ma
605 ; ZVFHMIN-NEXT: vmv.v.x v16, a1
606 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
607 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
608 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
609 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
610 ; ZVFHMIN-NEXT: vfmadd.vv v8, v20, v12
612 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
613 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
614 %vaext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
615 %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
616 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %vaext, <vscale x 8 x float> %vbext, <vscale x 8 x float> %vc, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
617 ret <vscale x 8 x float> %v
620 declare <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x i1>, i32)
621 declare <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
622 declare <vscale x 16 x float> @llvm.vp.merge.nxv16f32(<vscale x 16 x i1>, <vscale x 16 x float>, <vscale x 16 x float>, i32)
623 declare <vscale x 16 x float> @llvm.vp.select.nxv16f32(<vscale x 16 x i1>, <vscale x 16 x float>, <vscale x 16 x float>, i32)
624 declare <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
626 define <vscale x 16 x float> @vfmacc_vv_nxv16f32(<vscale x 16 x half> %a, <vscale x 16 x half> %b, <vscale x 16 x float> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
627 ; ZVFH-LABEL: vfmacc_vv_nxv16f32:
629 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
630 ; ZVFH-NEXT: vfwmacc.vv v16, v8, v12, v0.t
631 ; ZVFH-NEXT: vmv8r.v v8, v16
634 ; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32:
636 ; ZVFHMIN-NEXT: addi sp, sp, -16
637 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
638 ; ZVFHMIN-NEXT: csrr a1, vlenb
639 ; ZVFHMIN-NEXT: slli a1, a1, 3
640 ; ZVFHMIN-NEXT: sub sp, sp, a1
641 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
642 ; ZVFHMIN-NEXT: addi a1, sp, 16
643 ; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
644 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
645 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
646 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
647 ; ZVFHMIN-NEXT: addi a0, sp, 16
648 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
649 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
650 ; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
651 ; ZVFHMIN-NEXT: vmv.v.v v8, v24
652 ; ZVFHMIN-NEXT: csrr a0, vlenb
653 ; ZVFHMIN-NEXT: slli a0, a0, 3
654 ; ZVFHMIN-NEXT: add sp, sp, a0
655 ; ZVFHMIN-NEXT: .cfi_def_cfa sp, 16
656 ; ZVFHMIN-NEXT: addi sp, sp, 16
657 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 0
659 %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> %m, i32 %evl)
660 %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
661 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %aext, <vscale x 16 x float> %bext, <vscale x 16 x float> %c, <vscale x 16 x i1> %m, i32 %evl)
662 ret <vscale x 16 x float> %v
665 define <vscale x 16 x float> @vfmacc_vv_nxv16f32_unmasked(<vscale x 16 x half> %a, <vscale x 16 x half> %b, <vscale x 16 x float> %c, i32 zeroext %evl) {
666 ; ZVFH-LABEL: vfmacc_vv_nxv16f32_unmasked:
668 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
669 ; ZVFH-NEXT: vfwmacc.vv v16, v8, v12
670 ; ZVFH-NEXT: vmv8r.v v8, v16
673 ; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32_unmasked:
675 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
676 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
677 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
678 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
679 ; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16
680 ; ZVFHMIN-NEXT: vmv.v.v v8, v24
682 %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
683 %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
684 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %aext, <vscale x 16 x float> %bext, <vscale x 16 x float> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
685 ret <vscale x 16 x float> %v
688 define <vscale x 16 x float> @vfmacc_vf_nxv16f32(<vscale x 16 x half> %va, half %b, <vscale x 16 x float> %vc, <vscale x 16 x i1> %m, i32 zeroext %evl) {
689 ; ZVFH-LABEL: vfmacc_vf_nxv16f32:
691 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
692 ; ZVFH-NEXT: vfwmacc.vf v16, fa0, v8, v0.t
693 ; ZVFH-NEXT: vmv8r.v v8, v16
696 ; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32:
698 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
699 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
700 ; ZVFHMIN-NEXT: vmv.v.x v4, a1
701 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
702 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
703 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t
704 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
705 ; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t
707 %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
708 %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
709 %vaext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
710 %vbext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
711 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %vaext, <vscale x 16 x float> %vbext, <vscale x 16 x float> %vc, <vscale x 16 x i1> %m, i32 %evl)
712 ret <vscale x 16 x float> %v
715 define <vscale x 16 x float> @vfmacc_vf_nxv16f32_unmasked(<vscale x 16 x half> %va, half %b, <vscale x 16 x float> %vc, i32 zeroext %evl) {
716 ; ZVFH-LABEL: vfmacc_vf_nxv16f32_unmasked:
718 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
719 ; ZVFH-NEXT: vfwmacc.vf v16, fa0, v8
720 ; ZVFH-NEXT: vmv8r.v v8, v16
723 ; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32_unmasked:
725 ; ZVFHMIN-NEXT: fmv.x.h a1, fa0
726 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma
727 ; ZVFHMIN-NEXT: vmv.v.x v24, a1
728 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
729 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
730 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24
731 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
732 ; ZVFHMIN-NEXT: vfmadd.vv v8, v0, v16
734 %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
735 %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
736 %vaext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
737 %vbext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
738 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %vaext, <vscale x 16 x float> %vbext, <vscale x 16 x float> %vc, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
739 ret <vscale x 16 x float> %v
742 declare <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x i1>, i32)
743 declare <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
744 declare <vscale x 1 x double> @llvm.vp.merge.nxv1f64(<vscale x 1 x i1>, <vscale x 1 x double>, <vscale x 1 x double>, i32)
745 declare <vscale x 1 x double> @llvm.vp.select.nxv1f64(<vscale x 1 x i1>, <vscale x 1 x double>, <vscale x 1 x double>, i32)
746 declare <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
748 define <vscale x 1 x double> @vfmacc_vv_nxv1f64(<vscale x 1 x float> %a, <vscale x 1 x float> %b, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
749 ; CHECK-LABEL: vfmacc_vv_nxv1f64:
751 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
752 ; CHECK-NEXT: vfwmacc.vv v10, v8, v9, v0.t
753 ; CHECK-NEXT: vmv1r.v v8, v10
755 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x i1> %m, i32 %evl)
756 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %b, <vscale x 1 x i1> %m, i32 %evl)
757 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 %evl)
758 ret <vscale x 1 x double> %v
761 define <vscale x 1 x double> @vfmacc_vv_nxv1f64_unmasked(<vscale x 1 x float> %a, <vscale x 1 x float> %b, <vscale x 1 x double> %c, i32 zeroext %evl) {
762 ; CHECK-LABEL: vfmacc_vv_nxv1f64_unmasked:
764 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
765 ; CHECK-NEXT: vfwmacc.vv v10, v8, v9
766 ; CHECK-NEXT: vmv1r.v v8, v10
768 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
769 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
770 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
771 ret <vscale x 1 x double> %v
774 define <vscale x 1 x double> @vfmacc_vf_nxv1f64(<vscale x 1 x float> %va, float %b, <vscale x 1 x double> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
775 ; CHECK-LABEL: vfmacc_vf_nxv1f64:
777 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
778 ; CHECK-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
779 ; CHECK-NEXT: vmv1r.v v8, v9
781 %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0
782 %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
783 %vaext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
784 %vbext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl)
785 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %vaext, <vscale x 1 x double> %vbext, <vscale x 1 x double> %vc, <vscale x 1 x i1> %m, i32 %evl)
786 ret <vscale x 1 x double> %v
789 define <vscale x 1 x double> @vfmacc_vf_nxv1f64_unmasked(<vscale x 1 x float> %va, float %b, <vscale x 1 x double> %vc, i32 zeroext %evl) {
790 ; CHECK-LABEL: vfmacc_vf_nxv1f64_unmasked:
792 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
793 ; CHECK-NEXT: vfwmacc.vf v9, fa0, v8
794 ; CHECK-NEXT: vmv1r.v v8, v9
796 %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0
797 %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
798 %vaext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
799 %vbext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
800 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %vaext, <vscale x 1 x double> %vbext, <vscale x 1 x double> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
801 ret <vscale x 1 x double> %v
804 declare <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, i32)
805 declare <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
806 declare <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
807 declare <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
808 declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
810 define <vscale x 2 x double> @vfmacc_vv_nxv2f64(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
811 ; CHECK-LABEL: vfmacc_vv_nxv2f64:
813 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
814 ; CHECK-NEXT: vfwmacc.vv v10, v8, v9, v0.t
815 ; CHECK-NEXT: vmv2r.v v8, v10
817 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %evl)
818 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x i1> %m, i32 %evl)
819 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 %evl)
820 ret <vscale x 2 x double> %v
823 define <vscale x 2 x double> @vfmacc_vv_nxv2f64_unmasked(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x double> %c, i32 zeroext %evl) {
824 ; CHECK-LABEL: vfmacc_vv_nxv2f64_unmasked:
826 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
827 ; CHECK-NEXT: vfwmacc.vv v10, v8, v9
828 ; CHECK-NEXT: vmv2r.v v8, v10
830 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
831 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
832 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
833 ret <vscale x 2 x double> %v
836 define <vscale x 2 x double> @vfmacc_vf_nxv2f64(<vscale x 2 x float> %va, float %b, <vscale x 2 x double> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) {
837 ; CHECK-LABEL: vfmacc_vf_nxv2f64:
839 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
840 ; CHECK-NEXT: vfwmacc.vf v10, fa0, v8, v0.t
841 ; CHECK-NEXT: vmv2r.v v8, v10
843 %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0
844 %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
845 %vaext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
846 %vbext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl)
847 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %vaext, <vscale x 2 x double> %vbext, <vscale x 2 x double> %vc, <vscale x 2 x i1> %m, i32 %evl)
848 ret <vscale x 2 x double> %v
851 define <vscale x 2 x double> @vfmacc_vf_nxv2f64_unmasked(<vscale x 2 x float> %va, float %b, <vscale x 2 x double> %vc, i32 zeroext %evl) {
852 ; CHECK-LABEL: vfmacc_vf_nxv2f64_unmasked:
854 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
855 ; CHECK-NEXT: vfwmacc.vf v10, fa0, v8
856 ; CHECK-NEXT: vmv2r.v v8, v10
858 %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0
859 %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
860 %vaext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
861 %vbext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
862 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %vaext, <vscale x 2 x double> %vbext, <vscale x 2 x double> %vc, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
863 ret <vscale x 2 x double> %v
866 declare <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x i1>, i32)
867 declare <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
868 declare <vscale x 4 x double> @llvm.vp.merge.nxv4f64(<vscale x 4 x i1>, <vscale x 4 x double>, <vscale x 4 x double>, i32)
869 declare <vscale x 4 x double> @llvm.vp.select.nxv4f64(<vscale x 4 x i1>, <vscale x 4 x double>, <vscale x 4 x double>, i32)
870 declare <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
872 define <vscale x 4 x double> @vfmacc_vv_nxv4f64(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
873 ; CHECK-LABEL: vfmacc_vv_nxv4f64:
875 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
876 ; CHECK-NEXT: vfwmacc.vv v12, v8, v10, v0.t
877 ; CHECK-NEXT: vmv4r.v v8, v12
879 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %m, i32 %evl)
880 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x i1> %m, i32 %evl)
881 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 %evl)
882 ret <vscale x 4 x double> %v
885 define <vscale x 4 x double> @vfmacc_vv_nxv4f64_unmasked(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x double> %c, i32 zeroext %evl) {
886 ; CHECK-LABEL: vfmacc_vv_nxv4f64_unmasked:
888 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
889 ; CHECK-NEXT: vfwmacc.vv v12, v8, v10
890 ; CHECK-NEXT: vmv4r.v v8, v12
892 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
893 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
894 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
895 ret <vscale x 4 x double> %v
898 define <vscale x 4 x double> @vfmacc_vf_nxv4f64(<vscale x 4 x float> %va, float %b, <vscale x 4 x double> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) {
899 ; CHECK-LABEL: vfmacc_vf_nxv4f64:
901 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
902 ; CHECK-NEXT: vfwmacc.vf v12, fa0, v8, v0.t
903 ; CHECK-NEXT: vmv4r.v v8, v12
905 %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0
906 %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
907 %vaext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
908 %vbext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl)
909 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %vaext, <vscale x 4 x double> %vbext, <vscale x 4 x double> %vc, <vscale x 4 x i1> %m, i32 %evl)
910 ret <vscale x 4 x double> %v
913 define <vscale x 4 x double> @vfmacc_vf_nxv4f64_unmasked(<vscale x 4 x float> %va, float %b, <vscale x 4 x double> %vc, i32 zeroext %evl) {
914 ; CHECK-LABEL: vfmacc_vf_nxv4f64_unmasked:
916 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
917 ; CHECK-NEXT: vfwmacc.vf v12, fa0, v8
918 ; CHECK-NEXT: vmv4r.v v8, v12
920 %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0
921 %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
922 %vaext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
923 %vbext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
924 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %vaext, <vscale x 4 x double> %vbext, <vscale x 4 x double> %vc, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
925 ret <vscale x 4 x double> %v
928 declare <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x i1>, i32)
929 declare <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
930 declare <vscale x 8 x double> @llvm.vp.merge.nxv8f64(<vscale x 8 x i1>, <vscale x 8 x double>, <vscale x 8 x double>, i32)
931 declare <vscale x 8 x double> @llvm.vp.select.nxv8f64(<vscale x 8 x i1>, <vscale x 8 x double>, <vscale x 8 x double>, i32)
932 declare <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
934 define <vscale x 8 x double> @vfmacc_vv_nxv8f64(<vscale x 8 x float> %a, <vscale x 8 x float> %b, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
935 ; CHECK-LABEL: vfmacc_vv_nxv8f64:
937 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
938 ; CHECK-NEXT: vfwmacc.vv v16, v8, v12, v0.t
939 ; CHECK-NEXT: vmv8r.v v8, v16
941 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x i1> %m, i32 %evl)
942 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %b, <vscale x 8 x i1> %m, i32 %evl)
943 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 %evl)
944 ret <vscale x 8 x double> %v
947 define <vscale x 8 x double> @vfmacc_vv_nxv8f64_unmasked(<vscale x 8 x float> %a, <vscale x 8 x float> %b, <vscale x 8 x double> %c, i32 zeroext %evl) {
948 ; CHECK-LABEL: vfmacc_vv_nxv8f64_unmasked:
950 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
951 ; CHECK-NEXT: vfwmacc.vv v16, v8, v12
952 ; CHECK-NEXT: vmv8r.v v8, v16
954 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
955 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
956 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
957 ret <vscale x 8 x double> %v
960 define <vscale x 8 x double> @vfmacc_vf_nxv8f64(<vscale x 8 x float> %va, float %b, <vscale x 8 x double> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) {
961 ; CHECK-LABEL: vfmacc_vf_nxv8f64:
963 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
964 ; CHECK-NEXT: vfwmacc.vf v16, fa0, v8, v0.t
965 ; CHECK-NEXT: vmv8r.v v8, v16
967 %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0
968 %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
969 %vaext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
970 %vbext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl)
971 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %vaext, <vscale x 8 x double> %vbext, <vscale x 8 x double> %vc, <vscale x 8 x i1> %m, i32 %evl)
972 ret <vscale x 8 x double> %v
975 define <vscale x 8 x double> @vfmacc_vf_nxv8f64_unmasked(<vscale x 8 x float> %va, float %b, <vscale x 8 x double> %vc, i32 zeroext %evl) {
976 ; CHECK-LABEL: vfmacc_vf_nxv8f64_unmasked:
978 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
979 ; CHECK-NEXT: vfwmacc.vf v16, fa0, v8
980 ; CHECK-NEXT: vmv8r.v v8, v16
982 %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0
983 %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
984 %vaext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
985 %vbext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
986 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %vaext, <vscale x 8 x double> %vbext, <vscale x 8 x double> %vc, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
987 ret <vscale x 8 x double> %v
990 declare <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
992 define <vscale x 1 x double> @vfmacc_vv_nxv1f64_nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
993 ; CHECK-LABEL: vfmacc_vv_nxv1f64_nxv1f16:
995 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
996 ; CHECK-NEXT: vfwcvt.f.f.v v11, v8, v0.t
997 ; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t
998 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
999 ; CHECK-NEXT: vfwmacc.vv v10, v11, v8, v0.t
1000 ; CHECK-NEXT: vmv1r.v v8, v10
1002 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
1003 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
1004 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 %evl)
1005 ret <vscale x 1 x double> %v
1008 define <vscale x 1 x double> @vfmacc_vv_nxv1f64_nxv1f16_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x double> %c, i32 zeroext %evl) {
1009 ; CHECK-LABEL: vfmacc_vv_nxv1f64_nxv1f16_unmasked:
1011 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1012 ; CHECK-NEXT: vfwcvt.f.f.v v11, v8
1013 ; CHECK-NEXT: vfwcvt.f.f.v v8, v9
1014 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1015 ; CHECK-NEXT: vfwmacc.vv v10, v11, v8
1016 ; CHECK-NEXT: vmv1r.v v8, v10
1018 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1019 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1020 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1021 ret <vscale x 1 x double> %v
1024 declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
1026 define <vscale x 2 x double> @vfmacc_vv_nxv2f64_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1027 ; CHECK-LABEL: vfmacc_vv_nxv2f64_nxv2f16:
1029 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1030 ; CHECK-NEXT: vfwcvt.f.f.v v12, v8, v0.t
1031 ; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t
1032 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1033 ; CHECK-NEXT: vfwmacc.vv v10, v12, v8, v0.t
1034 ; CHECK-NEXT: vmv2r.v v8, v10
1036 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl)
1037 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
1038 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 %evl)
1039 ret <vscale x 2 x double> %v
1042 define <vscale x 2 x double> @vfmacc_vv_nxv2f64_nxv2f16_unmasked(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x double> %c, i32 zeroext %evl) {
1043 ; CHECK-LABEL: vfmacc_vv_nxv2f64_nxv2f16_unmasked:
1045 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1046 ; CHECK-NEXT: vfwcvt.f.f.v v12, v8
1047 ; CHECK-NEXT: vfwcvt.f.f.v v8, v9
1048 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1049 ; CHECK-NEXT: vfwmacc.vv v10, v12, v8
1050 ; CHECK-NEXT: vmv2r.v v8, v10
1052 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1053 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1054 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1055 ret <vscale x 2 x double> %v
1058 declare <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
1060 define <vscale x 4 x double> @vfmacc_vv_nxv4f64_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1061 ; CHECK-LABEL: vfmacc_vv_nxv4f64_nxv4f16:
1063 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1064 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8, v0.t
1065 ; CHECK-NEXT: vfwcvt.f.f.v v16, v9, v0.t
1066 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1067 ; CHECK-NEXT: vfwmacc.vv v12, v10, v16, v0.t
1068 ; CHECK-NEXT: vmv4r.v v8, v12
1070 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
1071 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
1072 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 %evl)
1073 ret <vscale x 4 x double> %v
1076 define <vscale x 4 x double> @vfmacc_vv_nxv4f64_nxv4f16_unmasked(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x double> %c, i32 zeroext %evl) {
1077 ; CHECK-LABEL: vfmacc_vv_nxv4f64_nxv4f16_unmasked:
1079 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1080 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8
1081 ; CHECK-NEXT: vfwcvt.f.f.v v16, v9
1082 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1083 ; CHECK-NEXT: vfwmacc.vv v12, v10, v16
1084 ; CHECK-NEXT: vmv4r.v v8, v12
1086 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1087 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1088 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1089 ret <vscale x 4 x double> %v
1092 declare <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
1094 define <vscale x 8 x double> @vfmacc_vv_nxv8f64_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1095 ; CHECK-LABEL: vfmacc_vv_nxv8f64_nxv8f16:
1097 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
1098 ; CHECK-NEXT: vfwcvt.f.f.v v12, v8, v0.t
1099 ; CHECK-NEXT: vfwcvt.f.f.v v24, v10, v0.t
1100 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1101 ; CHECK-NEXT: vfwmacc.vv v16, v12, v24, v0.t
1102 ; CHECK-NEXT: vmv8r.v v8, v16
1104 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
1105 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
1106 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 %evl)
1107 ret <vscale x 8 x double> %v
1110 define <vscale x 8 x double> @vfmacc_vv_nxv8f64_nxv8f16_unmasked(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x double> %c, i32 zeroext %evl) {
1111 ; CHECK-LABEL: vfmacc_vv_nxv8f64_nxv8f16_unmasked:
1113 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
1114 ; CHECK-NEXT: vfwcvt.f.f.v v12, v8
1115 ; CHECK-NEXT: vfwcvt.f.f.v v24, v10
1116 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1117 ; CHECK-NEXT: vfwmacc.vv v16, v12, v24
1118 ; CHECK-NEXT: vmv8r.v v8, v16
1120 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1121 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1122 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1123 ret <vscale x 8 x double> %v
1126 define <vscale x 1 x float> @vfmacc_squared_nxv1f32(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1127 ; ZVFH-LABEL: vfmacc_squared_nxv1f32:
1129 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1130 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v8, v0.t
1131 ; ZVFH-NEXT: vmv1r.v v8, v10
1134 ; ZVFHMIN-LABEL: vfmacc_squared_nxv1f32:
1136 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1137 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t
1138 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1139 ; ZVFHMIN-NEXT: vfmadd.vv v9, v9, v10, v0.t
1140 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
1142 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
1143 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %aext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
1144 ret <vscale x 1 x float> %v
1147 define <vscale x 1 x float> @vfmacc_squared_nxv1f32_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
1148 ; ZVFH-LABEL: vfmacc_squared_nxv1f32_unmasked:
1150 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1151 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v8
1152 ; ZVFH-NEXT: vmv1r.v v8, v10
1155 ; ZVFHMIN-LABEL: vfmacc_squared_nxv1f32_unmasked:
1157 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1158 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
1159 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1160 ; ZVFHMIN-NEXT: vfmadd.vv v9, v9, v10
1161 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
1163 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1164 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %aext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1165 ret <vscale x 1 x float> %v