1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
11 declare <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
12 declare <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
13 declare <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
14 declare <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1>, <vscale x 1 x float>, <vscale x 1 x float>, i32)
16 define <vscale x 1 x float> @vfmacc_vv_nxv1f32(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
17 ; ZVFH-LABEL: vfmacc_vv_nxv1f32:
19 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
20 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
21 ; ZVFH-NEXT: vmv1r.v v8, v10
24 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32:
26 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
27 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
28 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t
29 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
30 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10, v0.t
32 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
33 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
34 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
35 ret <vscale x 1 x float> %v
38 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
39 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_unmasked:
41 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
42 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9
43 ; ZVFH-NEXT: vmv1r.v v8, v10
46 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_unmasked:
48 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
49 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
50 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
51 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
52 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10
54 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
55 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
56 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
57 ret <vscale x 1 x float> %v
60 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
61 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_tu:
63 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
64 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
65 ; ZVFH-NEXT: vmv1r.v v8, v10
68 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_tu:
70 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
71 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
72 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
73 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
74 ; ZVFHMIN-NEXT: vfmacc.vv v10, v11, v8, v0.t
75 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
77 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
78 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
79 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
80 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
81 ret <vscale x 1 x float> %u
84 ; FIXME: Support this case?
85 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_masked__tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
86 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_masked__tu:
88 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
89 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
90 ; ZVFH-NEXT: vmv1r.v v8, v10
93 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_masked__tu:
95 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
96 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
97 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t
98 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
99 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10, v0.t
100 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
101 ; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v8, v0
102 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
104 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
105 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
106 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
107 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
108 ret <vscale x 1 x float> %u
111 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_unmasked_tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
112 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_unmasked_tu:
114 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
115 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9
116 ; ZVFH-NEXT: vmv1r.v v8, v10
119 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_unmasked_tu:
121 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
122 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
123 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
124 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
125 ; ZVFHMIN-NEXT: vfmacc.vv v10, v11, v8
126 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
128 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
129 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
130 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
131 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
132 ret <vscale x 1 x float> %u
135 define <vscale x 1 x float> @vfmacc_vf_nxv1f32(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
136 ; ZVFH-LABEL: vfmacc_vf_nxv1f32:
138 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
139 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
140 ; ZVFH-NEXT: vmv1r.v v8, v9
143 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32:
145 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
146 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
147 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
148 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
149 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
150 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
151 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
152 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
153 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
154 ; ZVFHMIN-NEXT: vfmadd.vv v8, v10, v9, v0.t
156 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
157 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
158 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
159 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
160 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 %evl)
161 ret <vscale x 1 x float> %v
164 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
165 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_commute:
167 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
168 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
169 ; ZVFH-NEXT: vmv1r.v v8, v9
172 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute:
174 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
175 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
176 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
177 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
178 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
179 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
180 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
181 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
182 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
183 ; ZVFHMIN-NEXT: vfmadd.vv v10, v8, v9, v0.t
184 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
186 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
187 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
188 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
189 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
190 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vbext, <vscale x 1 x float> %vaext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 %evl)
191 ret <vscale x 1 x float> %v
194 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, i32 zeroext %evl) {
195 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_unmasked:
197 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
198 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8
199 ; ZVFH-NEXT: vmv1r.v v8, v9
202 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked:
204 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
205 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
206 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
207 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
208 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
209 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
210 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
211 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
212 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
213 ; ZVFHMIN-NEXT: vfmadd.vv v8, v10, v9
215 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
216 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
217 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
218 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
219 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
220 ret <vscale x 1 x float> %v
223 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
224 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_tu:
226 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
227 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
228 ; ZVFH-NEXT: vmv1r.v v8, v9
231 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_tu:
233 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
234 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
235 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
236 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
237 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
238 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
239 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
240 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
241 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
242 ; ZVFHMIN-NEXT: vfmacc.vv v9, v10, v8, v0.t
243 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
245 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
246 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
247 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
248 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
249 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
250 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
251 ret <vscale x 1 x float> %u
254 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
255 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_commute_tu:
257 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
258 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
259 ; ZVFH-NEXT: vmv1r.v v8, v9
262 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute_tu:
264 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
265 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
266 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
267 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
268 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
269 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
270 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
271 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
272 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
273 ; ZVFHMIN-NEXT: vfmacc.vv v9, v8, v10, v0.t
274 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
276 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
277 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
278 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
279 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
280 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vbext, <vscale x 1 x float> %vaext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
281 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
282 ret <vscale x 1 x float> %u
285 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, i32 zeroext %evl) {
286 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_unmasked_tu:
288 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
289 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8
290 ; ZVFH-NEXT: vmv1r.v v8, v9
293 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked_tu:
295 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
296 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
297 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
298 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
299 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
300 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
301 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
302 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
303 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
304 ; ZVFHMIN-NEXT: vfmacc.vv v9, v10, v8
305 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
307 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
308 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
309 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
310 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
311 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
312 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> splat (i1 -1), <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
313 ret <vscale x 1 x float> %u
316 declare <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32)
317 declare <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
318 declare <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
319 declare <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
320 declare <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
322 define <vscale x 2 x float> @vfmacc_vv_nxv2f32(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
323 ; ZVFH-LABEL: vfmacc_vv_nxv2f32:
325 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
326 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
327 ; ZVFH-NEXT: vmv1r.v v8, v10
330 ; ZVFHMIN-LABEL: vfmacc_vv_nxv2f32:
332 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
333 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
334 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t
335 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
336 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10, v0.t
338 %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl)
339 %bext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
340 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %bext, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 %evl)
341 ret <vscale x 2 x float> %v
344 define <vscale x 2 x float> @vfmacc_vv_nxv2f32_unmasked(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x float> %c, i32 zeroext %evl) {
345 ; ZVFH-LABEL: vfmacc_vv_nxv2f32_unmasked:
347 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
348 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9
349 ; ZVFH-NEXT: vmv1r.v v8, v10
352 ; ZVFHMIN-LABEL: vfmacc_vv_nxv2f32_unmasked:
354 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
355 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
356 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
357 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
358 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10
360 %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
361 %bext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
362 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %bext, <vscale x 2 x float> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
363 ret <vscale x 2 x float> %v
366 define <vscale x 2 x float> @vfmacc_vf_nxv2f32(<vscale x 2 x half> %va, half %b, <vscale x 2 x float> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) {
367 ; ZVFH-LABEL: vfmacc_vf_nxv2f32:
369 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
370 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
371 ; ZVFH-NEXT: vmv1r.v v8, v9
374 ; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32:
376 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
377 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
378 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
379 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
380 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
381 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
382 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
383 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
384 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
385 ; ZVFHMIN-NEXT: vfmadd.vv v8, v10, v9, v0.t
387 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
388 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
389 %vaext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
390 %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
391 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %vaext, <vscale x 2 x float> %vbext, <vscale x 2 x float> %vc, <vscale x 2 x i1> %m, i32 %evl)
392 ret <vscale x 2 x float> %v
395 define <vscale x 2 x float> @vfmacc_vf_nxv2f32_unmasked(<vscale x 2 x half> %va, half %b, <vscale x 2 x float> %vc, i32 zeroext %evl) {
396 ; ZVFH-LABEL: vfmacc_vf_nxv2f32_unmasked:
398 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
399 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8
400 ; ZVFH-NEXT: vmv1r.v v8, v9
403 ; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32_unmasked:
405 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
406 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
407 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
408 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
409 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
410 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
411 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
412 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
413 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
414 ; ZVFHMIN-NEXT: vfmadd.vv v8, v10, v9
416 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
417 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
418 %vaext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
419 %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
420 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %vaext, <vscale x 2 x float> %vbext, <vscale x 2 x float> %vc, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
421 ret <vscale x 2 x float> %v
424 declare <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, i32)
425 declare <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
426 declare <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
427 declare <vscale x 4 x float> @llvm.vp.select.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
428 declare <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
430 define <vscale x 4 x float> @vfmacc_vv_nxv4f32(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
431 ; ZVFH-LABEL: vfmacc_vv_nxv4f32:
433 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
434 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
435 ; ZVFH-NEXT: vmv2r.v v8, v10
438 ; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32:
440 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
441 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
442 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
443 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
444 ; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10, v0.t
445 ; ZVFHMIN-NEXT: vmv.v.v v8, v12
447 %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
448 %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
449 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %bext, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 %evl)
450 ret <vscale x 4 x float> %v
453 define <vscale x 4 x float> @vfmacc_vv_nxv4f32_unmasked(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x float> %c, i32 zeroext %evl) {
454 ; ZVFH-LABEL: vfmacc_vv_nxv4f32_unmasked:
456 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
457 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9
458 ; ZVFH-NEXT: vmv2r.v v8, v10
461 ; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32_unmasked:
463 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
464 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
465 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
466 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
467 ; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10
468 ; ZVFHMIN-NEXT: vmv.v.v v8, v12
470 %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
471 %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
472 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %bext, <vscale x 4 x float> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
473 ret <vscale x 4 x float> %v
476 define <vscale x 4 x float> @vfmacc_vf_nxv4f32(<vscale x 4 x half> %va, half %b, <vscale x 4 x float> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) {
477 ; ZVFH-LABEL: vfmacc_vf_nxv4f32:
479 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
480 ; ZVFH-NEXT: vfwmacc.vf v10, fa0, v8, v0.t
481 ; ZVFH-NEXT: vmv2r.v v8, v10
484 ; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32:
486 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
487 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
488 ; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
489 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
490 ; ZVFHMIN-NEXT: vfncvt.f.f.w v14, v12
491 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
492 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
493 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14, v0.t
494 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
495 ; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v10, v0.t
497 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
498 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
499 %vaext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
500 %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
501 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %vaext, <vscale x 4 x float> %vbext, <vscale x 4 x float> %vc, <vscale x 4 x i1> %m, i32 %evl)
502 ret <vscale x 4 x float> %v
505 define <vscale x 4 x float> @vfmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %va, half %b, <vscale x 4 x float> %vc, i32 zeroext %evl) {
506 ; ZVFH-LABEL: vfmacc_vf_nxv4f32_unmasked:
508 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
509 ; ZVFH-NEXT: vfwmacc.vf v10, fa0, v8
510 ; ZVFH-NEXT: vmv2r.v v8, v10
513 ; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32_unmasked:
515 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
516 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
517 ; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
518 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
519 ; ZVFHMIN-NEXT: vfncvt.f.f.w v14, v12
520 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
521 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
522 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14
523 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
524 ; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v10
526 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
527 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
528 %vaext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
529 %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
530 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %vaext, <vscale x 4 x float> %vbext, <vscale x 4 x float> %vc, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
531 ret <vscale x 4 x float> %v
534 declare <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x i1>, i32)
535 declare <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
536 declare <vscale x 8 x float> @llvm.vp.merge.nxv8f32(<vscale x 8 x i1>, <vscale x 8 x float>, <vscale x 8 x float>, i32)
537 declare <vscale x 8 x float> @llvm.vp.select.nxv8f32(<vscale x 8 x i1>, <vscale x 8 x float>, <vscale x 8 x float>, i32)
538 declare <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
540 define <vscale x 8 x float> @vfmacc_vv_nxv8f32(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
541 ; ZVFH-LABEL: vfmacc_vv_nxv8f32:
543 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
544 ; ZVFH-NEXT: vfwmacc.vv v12, v8, v10, v0.t
545 ; ZVFH-NEXT: vmv4r.v v8, v12
548 ; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32:
550 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
551 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
552 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
553 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
554 ; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12, v0.t
555 ; ZVFHMIN-NEXT: vmv.v.v v8, v16
557 %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
558 %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
559 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %bext, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 %evl)
560 ret <vscale x 8 x float> %v
563 define <vscale x 8 x float> @vfmacc_vv_nxv8f32_unmasked(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x float> %c, i32 zeroext %evl) {
564 ; ZVFH-LABEL: vfmacc_vv_nxv8f32_unmasked:
566 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
567 ; ZVFH-NEXT: vfwmacc.vv v12, v8, v10
568 ; ZVFH-NEXT: vmv4r.v v8, v12
571 ; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32_unmasked:
573 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
574 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
575 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
576 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
577 ; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12
578 ; ZVFHMIN-NEXT: vmv.v.v v8, v16
580 %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
581 %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
582 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %bext, <vscale x 8 x float> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
583 ret <vscale x 8 x float> %v
586 define <vscale x 8 x float> @vfmacc_vf_nxv8f32(<vscale x 8 x half> %va, half %b, <vscale x 8 x float> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) {
587 ; ZVFH-LABEL: vfmacc_vf_nxv8f32:
589 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
590 ; ZVFH-NEXT: vfwmacc.vf v12, fa0, v8, v0.t
591 ; ZVFH-NEXT: vmv4r.v v8, v12
594 ; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32:
596 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
597 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
598 ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
599 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
600 ; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v16
601 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
602 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
603 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
604 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
605 ; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v12, v0.t
607 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
608 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
609 %vaext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
610 %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
611 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %vaext, <vscale x 8 x float> %vbext, <vscale x 8 x float> %vc, <vscale x 8 x i1> %m, i32 %evl)
612 ret <vscale x 8 x float> %v
615 define <vscale x 8 x float> @vfmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %va, half %b, <vscale x 8 x float> %vc, i32 zeroext %evl) {
616 ; ZVFH-LABEL: vfmacc_vf_nxv8f32_unmasked:
618 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
619 ; ZVFH-NEXT: vfwmacc.vf v12, fa0, v8
620 ; ZVFH-NEXT: vmv4r.v v8, v12
623 ; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32_unmasked:
625 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
626 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
627 ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
628 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
629 ; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v16
630 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
631 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
632 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
633 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
634 ; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v12
636 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
637 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
638 %vaext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
639 %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
640 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %vaext, <vscale x 8 x float> %vbext, <vscale x 8 x float> %vc, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
641 ret <vscale x 8 x float> %v
644 declare <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x i1>, i32)
645 declare <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
646 declare <vscale x 16 x float> @llvm.vp.merge.nxv16f32(<vscale x 16 x i1>, <vscale x 16 x float>, <vscale x 16 x float>, i32)
647 declare <vscale x 16 x float> @llvm.vp.select.nxv16f32(<vscale x 16 x i1>, <vscale x 16 x float>, <vscale x 16 x float>, i32)
648 declare <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
650 define <vscale x 16 x float> @vfmacc_vv_nxv16f32(<vscale x 16 x half> %a, <vscale x 16 x half> %b, <vscale x 16 x float> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
651 ; ZVFH-LABEL: vfmacc_vv_nxv16f32:
653 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
654 ; ZVFH-NEXT: vfwmacc.vv v16, v8, v12, v0.t
655 ; ZVFH-NEXT: vmv8r.v v8, v16
658 ; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32:
660 ; ZVFHMIN-NEXT: addi sp, sp, -16
661 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
662 ; ZVFHMIN-NEXT: csrr a1, vlenb
663 ; ZVFHMIN-NEXT: slli a1, a1, 3
664 ; ZVFHMIN-NEXT: sub sp, sp, a1
665 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
666 ; ZVFHMIN-NEXT: addi a1, sp, 16
667 ; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
668 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
669 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
670 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
671 ; ZVFHMIN-NEXT: addi a0, sp, 16
672 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
673 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
674 ; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
675 ; ZVFHMIN-NEXT: vmv.v.v v8, v24
676 ; ZVFHMIN-NEXT: csrr a0, vlenb
677 ; ZVFHMIN-NEXT: slli a0, a0, 3
678 ; ZVFHMIN-NEXT: add sp, sp, a0
679 ; ZVFHMIN-NEXT: addi sp, sp, 16
681 %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> %m, i32 %evl)
682 %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
683 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %aext, <vscale x 16 x float> %bext, <vscale x 16 x float> %c, <vscale x 16 x i1> %m, i32 %evl)
684 ret <vscale x 16 x float> %v
687 define <vscale x 16 x float> @vfmacc_vv_nxv16f32_unmasked(<vscale x 16 x half> %a, <vscale x 16 x half> %b, <vscale x 16 x float> %c, i32 zeroext %evl) {
688 ; ZVFH-LABEL: vfmacc_vv_nxv16f32_unmasked:
690 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
691 ; ZVFH-NEXT: vfwmacc.vv v16, v8, v12
692 ; ZVFH-NEXT: vmv8r.v v8, v16
695 ; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32_unmasked:
697 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
698 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
699 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
700 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
701 ; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16
702 ; ZVFHMIN-NEXT: vmv.v.v v8, v24
704 %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
705 %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
706 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %aext, <vscale x 16 x float> %bext, <vscale x 16 x float> %c, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
707 ret <vscale x 16 x float> %v
710 define <vscale x 16 x float> @vfmacc_vf_nxv16f32(<vscale x 16 x half> %va, half %b, <vscale x 16 x float> %vc, <vscale x 16 x i1> %m, i32 zeroext %evl) {
711 ; ZVFH-LABEL: vfmacc_vf_nxv16f32:
713 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
714 ; ZVFH-NEXT: vfwmacc.vf v16, fa0, v8, v0.t
715 ; ZVFH-NEXT: vmv8r.v v8, v16
718 ; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32:
720 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
721 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma
722 ; ZVFHMIN-NEXT: vfmv.v.f v24, fa5
723 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
724 ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24
725 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
726 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
727 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t
728 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
729 ; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t
731 %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
732 %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
733 %vaext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
734 %vbext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
735 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %vaext, <vscale x 16 x float> %vbext, <vscale x 16 x float> %vc, <vscale x 16 x i1> %m, i32 %evl)
736 ret <vscale x 16 x float> %v
739 define <vscale x 16 x float> @vfmacc_vf_nxv16f32_unmasked(<vscale x 16 x half> %va, half %b, <vscale x 16 x float> %vc, i32 zeroext %evl) {
740 ; ZVFH-LABEL: vfmacc_vf_nxv16f32_unmasked:
742 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
743 ; ZVFH-NEXT: vfwmacc.vf v16, fa0, v8
744 ; ZVFH-NEXT: vmv8r.v v8, v16
747 ; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32_unmasked:
749 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
750 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma
751 ; ZVFHMIN-NEXT: vfmv.v.f v24, fa5
752 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
753 ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24
754 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
755 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
756 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4
757 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
758 ; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16
760 %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
761 %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
762 %vaext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
763 %vbext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
764 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %vaext, <vscale x 16 x float> %vbext, <vscale x 16 x float> %vc, <vscale x 16 x i1> splat (i1 -1), i32 %evl)
765 ret <vscale x 16 x float> %v
768 declare <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x i1>, i32)
769 declare <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
770 declare <vscale x 1 x double> @llvm.vp.merge.nxv1f64(<vscale x 1 x i1>, <vscale x 1 x double>, <vscale x 1 x double>, i32)
771 declare <vscale x 1 x double> @llvm.vp.select.nxv1f64(<vscale x 1 x i1>, <vscale x 1 x double>, <vscale x 1 x double>, i32)
772 declare <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
774 define <vscale x 1 x double> @vfmacc_vv_nxv1f64(<vscale x 1 x float> %a, <vscale x 1 x float> %b, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
775 ; CHECK-LABEL: vfmacc_vv_nxv1f64:
777 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
778 ; CHECK-NEXT: vfwmacc.vv v10, v8, v9, v0.t
779 ; CHECK-NEXT: vmv1r.v v8, v10
781 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x i1> %m, i32 %evl)
782 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %b, <vscale x 1 x i1> %m, i32 %evl)
783 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 %evl)
784 ret <vscale x 1 x double> %v
787 define <vscale x 1 x double> @vfmacc_vv_nxv1f64_unmasked(<vscale x 1 x float> %a, <vscale x 1 x float> %b, <vscale x 1 x double> %c, i32 zeroext %evl) {
788 ; CHECK-LABEL: vfmacc_vv_nxv1f64_unmasked:
790 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
791 ; CHECK-NEXT: vfwmacc.vv v10, v8, v9
792 ; CHECK-NEXT: vmv1r.v v8, v10
794 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
795 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
796 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
797 ret <vscale x 1 x double> %v
800 define <vscale x 1 x double> @vfmacc_vf_nxv1f64(<vscale x 1 x float> %va, float %b, <vscale x 1 x double> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
801 ; CHECK-LABEL: vfmacc_vf_nxv1f64:
803 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
804 ; CHECK-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
805 ; CHECK-NEXT: vmv1r.v v8, v9
807 %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0
808 %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
809 %vaext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
810 %vbext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl)
811 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %vaext, <vscale x 1 x double> %vbext, <vscale x 1 x double> %vc, <vscale x 1 x i1> %m, i32 %evl)
812 ret <vscale x 1 x double> %v
815 define <vscale x 1 x double> @vfmacc_vf_nxv1f64_unmasked(<vscale x 1 x float> %va, float %b, <vscale x 1 x double> %vc, i32 zeroext %evl) {
816 ; CHECK-LABEL: vfmacc_vf_nxv1f64_unmasked:
818 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
819 ; CHECK-NEXT: vfwmacc.vf v9, fa0, v8
820 ; CHECK-NEXT: vmv1r.v v8, v9
822 %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0
823 %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
824 %vaext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
825 %vbext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %vb, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
826 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %vaext, <vscale x 1 x double> %vbext, <vscale x 1 x double> %vc, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
827 ret <vscale x 1 x double> %v
830 declare <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, i32)
831 declare <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
832 declare <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
833 declare <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
834 declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
836 define <vscale x 2 x double> @vfmacc_vv_nxv2f64(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
837 ; CHECK-LABEL: vfmacc_vv_nxv2f64:
839 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
840 ; CHECK-NEXT: vfwmacc.vv v10, v8, v9, v0.t
841 ; CHECK-NEXT: vmv2r.v v8, v10
843 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %evl)
844 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x i1> %m, i32 %evl)
845 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 %evl)
846 ret <vscale x 2 x double> %v
849 define <vscale x 2 x double> @vfmacc_vv_nxv2f64_unmasked(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x double> %c, i32 zeroext %evl) {
850 ; CHECK-LABEL: vfmacc_vv_nxv2f64_unmasked:
852 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
853 ; CHECK-NEXT: vfwmacc.vv v10, v8, v9
854 ; CHECK-NEXT: vmv2r.v v8, v10
856 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
857 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
858 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
859 ret <vscale x 2 x double> %v
862 define <vscale x 2 x double> @vfmacc_vf_nxv2f64(<vscale x 2 x float> %va, float %b, <vscale x 2 x double> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) {
863 ; CHECK-LABEL: vfmacc_vf_nxv2f64:
865 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
866 ; CHECK-NEXT: vfwmacc.vf v10, fa0, v8, v0.t
867 ; CHECK-NEXT: vmv2r.v v8, v10
869 %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0
870 %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
871 %vaext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
872 %vbext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl)
873 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %vaext, <vscale x 2 x double> %vbext, <vscale x 2 x double> %vc, <vscale x 2 x i1> %m, i32 %evl)
874 ret <vscale x 2 x double> %v
877 define <vscale x 2 x double> @vfmacc_vf_nxv2f64_unmasked(<vscale x 2 x float> %va, float %b, <vscale x 2 x double> %vc, i32 zeroext %evl) {
878 ; CHECK-LABEL: vfmacc_vf_nxv2f64_unmasked:
880 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
881 ; CHECK-NEXT: vfwmacc.vf v10, fa0, v8
882 ; CHECK-NEXT: vmv2r.v v8, v10
884 %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0
885 %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
886 %vaext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
887 %vbext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %vb, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
888 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %vaext, <vscale x 2 x double> %vbext, <vscale x 2 x double> %vc, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
889 ret <vscale x 2 x double> %v
892 declare <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x i1>, i32)
893 declare <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
894 declare <vscale x 4 x double> @llvm.vp.merge.nxv4f64(<vscale x 4 x i1>, <vscale x 4 x double>, <vscale x 4 x double>, i32)
895 declare <vscale x 4 x double> @llvm.vp.select.nxv4f64(<vscale x 4 x i1>, <vscale x 4 x double>, <vscale x 4 x double>, i32)
896 declare <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
898 define <vscale x 4 x double> @vfmacc_vv_nxv4f64(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
899 ; CHECK-LABEL: vfmacc_vv_nxv4f64:
901 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
902 ; CHECK-NEXT: vfwmacc.vv v12, v8, v10, v0.t
903 ; CHECK-NEXT: vmv4r.v v8, v12
905 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %m, i32 %evl)
906 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x i1> %m, i32 %evl)
907 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 %evl)
908 ret <vscale x 4 x double> %v
911 define <vscale x 4 x double> @vfmacc_vv_nxv4f64_unmasked(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x double> %c, i32 zeroext %evl) {
912 ; CHECK-LABEL: vfmacc_vv_nxv4f64_unmasked:
914 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
915 ; CHECK-NEXT: vfwmacc.vv v12, v8, v10
916 ; CHECK-NEXT: vmv4r.v v8, v12
918 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
919 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
920 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
921 ret <vscale x 4 x double> %v
924 define <vscale x 4 x double> @vfmacc_vf_nxv4f64(<vscale x 4 x float> %va, float %b, <vscale x 4 x double> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) {
925 ; CHECK-LABEL: vfmacc_vf_nxv4f64:
927 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
928 ; CHECK-NEXT: vfwmacc.vf v12, fa0, v8, v0.t
929 ; CHECK-NEXT: vmv4r.v v8, v12
931 %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0
932 %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
933 %vaext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
934 %vbext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl)
935 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %vaext, <vscale x 4 x double> %vbext, <vscale x 4 x double> %vc, <vscale x 4 x i1> %m, i32 %evl)
936 ret <vscale x 4 x double> %v
939 define <vscale x 4 x double> @vfmacc_vf_nxv4f64_unmasked(<vscale x 4 x float> %va, float %b, <vscale x 4 x double> %vc, i32 zeroext %evl) {
940 ; CHECK-LABEL: vfmacc_vf_nxv4f64_unmasked:
942 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
943 ; CHECK-NEXT: vfwmacc.vf v12, fa0, v8
944 ; CHECK-NEXT: vmv4r.v v8, v12
946 %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0
947 %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
948 %vaext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
949 %vbext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
950 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %vaext, <vscale x 4 x double> %vbext, <vscale x 4 x double> %vc, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
951 ret <vscale x 4 x double> %v
954 declare <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x i1>, i32)
955 declare <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
956 declare <vscale x 8 x double> @llvm.vp.merge.nxv8f64(<vscale x 8 x i1>, <vscale x 8 x double>, <vscale x 8 x double>, i32)
957 declare <vscale x 8 x double> @llvm.vp.select.nxv8f64(<vscale x 8 x i1>, <vscale x 8 x double>, <vscale x 8 x double>, i32)
958 declare <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
960 define <vscale x 8 x double> @vfmacc_vv_nxv8f64(<vscale x 8 x float> %a, <vscale x 8 x float> %b, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
961 ; CHECK-LABEL: vfmacc_vv_nxv8f64:
963 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
964 ; CHECK-NEXT: vfwmacc.vv v16, v8, v12, v0.t
965 ; CHECK-NEXT: vmv8r.v v8, v16
967 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x i1> %m, i32 %evl)
968 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %b, <vscale x 8 x i1> %m, i32 %evl)
969 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 %evl)
970 ret <vscale x 8 x double> %v
973 define <vscale x 8 x double> @vfmacc_vv_nxv8f64_unmasked(<vscale x 8 x float> %a, <vscale x 8 x float> %b, <vscale x 8 x double> %c, i32 zeroext %evl) {
974 ; CHECK-LABEL: vfmacc_vv_nxv8f64_unmasked:
976 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
977 ; CHECK-NEXT: vfwmacc.vv v16, v8, v12
978 ; CHECK-NEXT: vmv8r.v v8, v16
980 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
981 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
982 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
983 ret <vscale x 8 x double> %v
986 define <vscale x 8 x double> @vfmacc_vf_nxv8f64(<vscale x 8 x float> %va, float %b, <vscale x 8 x double> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) {
987 ; CHECK-LABEL: vfmacc_vf_nxv8f64:
989 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
990 ; CHECK-NEXT: vfwmacc.vf v16, fa0, v8, v0.t
991 ; CHECK-NEXT: vmv8r.v v8, v16
993 %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0
994 %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
995 %vaext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
996 %vbext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl)
997 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %vaext, <vscale x 8 x double> %vbext, <vscale x 8 x double> %vc, <vscale x 8 x i1> %m, i32 %evl)
998 ret <vscale x 8 x double> %v
1001 define <vscale x 8 x double> @vfmacc_vf_nxv8f64_unmasked(<vscale x 8 x float> %va, float %b, <vscale x 8 x double> %vc, i32 zeroext %evl) {
1002 ; CHECK-LABEL: vfmacc_vf_nxv8f64_unmasked:
1004 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1005 ; CHECK-NEXT: vfwmacc.vf v16, fa0, v8
1006 ; CHECK-NEXT: vmv8r.v v8, v16
1008 %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0
1009 %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
1010 %vaext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1011 %vbext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1012 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %vaext, <vscale x 8 x double> %vbext, <vscale x 8 x double> %vc, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1013 ret <vscale x 8 x double> %v
1016 declare <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
1018 define <vscale x 1 x double> @vfmacc_vv_nxv1f64_nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1019 ; CHECK-LABEL: vfmacc_vv_nxv1f64_nxv1f16:
1021 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1022 ; CHECK-NEXT: vfwcvt.f.f.v v11, v8, v0.t
1023 ; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t
1024 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1025 ; CHECK-NEXT: vfwmacc.vv v10, v11, v8, v0.t
1026 ; CHECK-NEXT: vmv1r.v v8, v10
1028 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
1029 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
1030 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 %evl)
1031 ret <vscale x 1 x double> %v
1034 define <vscale x 1 x double> @vfmacc_vv_nxv1f64_nxv1f16_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x double> %c, i32 zeroext %evl) {
1035 ; CHECK-LABEL: vfmacc_vv_nxv1f64_nxv1f16_unmasked:
1037 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1038 ; CHECK-NEXT: vfwcvt.f.f.v v11, v8
1039 ; CHECK-NEXT: vfwcvt.f.f.v v8, v9
1040 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1041 ; CHECK-NEXT: vfwmacc.vv v10, v11, v8
1042 ; CHECK-NEXT: vmv1r.v v8, v10
1044 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1045 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1046 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1047 ret <vscale x 1 x double> %v
1050 declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
1052 define <vscale x 2 x double> @vfmacc_vv_nxv2f64_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1053 ; CHECK-LABEL: vfmacc_vv_nxv2f64_nxv2f16:
1055 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1056 ; CHECK-NEXT: vfwcvt.f.f.v v12, v8, v0.t
1057 ; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t
1058 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1059 ; CHECK-NEXT: vfwmacc.vv v10, v12, v8, v0.t
1060 ; CHECK-NEXT: vmv2r.v v8, v10
1062 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl)
1063 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
1064 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 %evl)
1065 ret <vscale x 2 x double> %v
1068 define <vscale x 2 x double> @vfmacc_vv_nxv2f64_nxv2f16_unmasked(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x double> %c, i32 zeroext %evl) {
1069 ; CHECK-LABEL: vfmacc_vv_nxv2f64_nxv2f16_unmasked:
1071 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1072 ; CHECK-NEXT: vfwcvt.f.f.v v12, v8
1073 ; CHECK-NEXT: vfwcvt.f.f.v v8, v9
1074 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1075 ; CHECK-NEXT: vfwmacc.vv v10, v12, v8
1076 ; CHECK-NEXT: vmv2r.v v8, v10
1078 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1079 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1080 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> splat (i1 -1), i32 %evl)
1081 ret <vscale x 2 x double> %v
1084 declare <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
1086 define <vscale x 4 x double> @vfmacc_vv_nxv4f64_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1087 ; CHECK-LABEL: vfmacc_vv_nxv4f64_nxv4f16:
1089 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1090 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8, v0.t
1091 ; CHECK-NEXT: vfwcvt.f.f.v v16, v9, v0.t
1092 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1093 ; CHECK-NEXT: vfwmacc.vv v12, v10, v16, v0.t
1094 ; CHECK-NEXT: vmv4r.v v8, v12
1096 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
1097 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
1098 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 %evl)
1099 ret <vscale x 4 x double> %v
1102 define <vscale x 4 x double> @vfmacc_vv_nxv4f64_nxv4f16_unmasked(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x double> %c, i32 zeroext %evl) {
1103 ; CHECK-LABEL: vfmacc_vv_nxv4f64_nxv4f16_unmasked:
1105 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1106 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8
1107 ; CHECK-NEXT: vfwcvt.f.f.v v16, v9
1108 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1109 ; CHECK-NEXT: vfwmacc.vv v12, v10, v16
1110 ; CHECK-NEXT: vmv4r.v v8, v12
1112 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1113 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1114 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> splat (i1 -1), i32 %evl)
1115 ret <vscale x 4 x double> %v
1118 declare <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
1120 define <vscale x 8 x double> @vfmacc_vv_nxv8f64_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1121 ; CHECK-LABEL: vfmacc_vv_nxv8f64_nxv8f16:
1123 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
1124 ; CHECK-NEXT: vfwcvt.f.f.v v12, v8, v0.t
1125 ; CHECK-NEXT: vfwcvt.f.f.v v24, v10, v0.t
1126 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1127 ; CHECK-NEXT: vfwmacc.vv v16, v12, v24, v0.t
1128 ; CHECK-NEXT: vmv8r.v v8, v16
1130 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
1131 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
1132 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 %evl)
1133 ret <vscale x 8 x double> %v
1136 define <vscale x 8 x double> @vfmacc_vv_nxv8f64_nxv8f16_unmasked(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x double> %c, i32 zeroext %evl) {
1137 ; CHECK-LABEL: vfmacc_vv_nxv8f64_nxv8f16_unmasked:
1139 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
1140 ; CHECK-NEXT: vfwcvt.f.f.v v12, v8
1141 ; CHECK-NEXT: vfwcvt.f.f.v v24, v10
1142 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1143 ; CHECK-NEXT: vfwmacc.vv v16, v12, v24
1144 ; CHECK-NEXT: vmv8r.v v8, v16
1146 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1147 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1148 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> splat (i1 -1), i32 %evl)
1149 ret <vscale x 8 x double> %v
1152 define <vscale x 1 x float> @vfmacc_squared_nxv1f32(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1153 ; ZVFH-LABEL: vfmacc_squared_nxv1f32:
1155 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1156 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v8, v0.t
1157 ; ZVFH-NEXT: vmv1r.v v8, v10
1160 ; ZVFHMIN-LABEL: vfmacc_squared_nxv1f32:
1162 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1163 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t
1164 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1165 ; ZVFHMIN-NEXT: vfmadd.vv v9, v9, v10, v0.t
1166 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
1168 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
1169 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %aext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
1170 ret <vscale x 1 x float> %v
1173 define <vscale x 1 x float> @vfmacc_squared_nxv1f32_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
1174 ; ZVFH-LABEL: vfmacc_squared_nxv1f32_unmasked:
1176 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1177 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v8
1178 ; ZVFH-NEXT: vmv1r.v v8, v10
1181 ; ZVFHMIN-LABEL: vfmacc_squared_nxv1f32_unmasked:
1183 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1184 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
1185 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1186 ; ZVFHMIN-NEXT: vfmadd.vv v9, v9, v10
1187 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
1189 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1190 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %aext, <vscale x 1 x float> %c, <vscale x 1 x i1> splat (i1 -1), i32 %evl)
1191 ret <vscale x 1 x float> %v