1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
11 declare <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
12 declare <vscale x 1 x float> @llvm.vp.fneg.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
13 declare <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
14 declare <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1>, <vscale x 1 x float>, <vscale x 1 x float>, i32)
16 define <vscale x 1 x float> @vfmacc_vv_nxv1f32(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
17 ; ZVFH-LABEL: vfmacc_vv_nxv1f32:
19 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
20 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
21 ; ZVFH-NEXT: vmv1r.v v8, v10
24 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32:
26 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
27 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
28 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t
29 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
30 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10, v0.t
32 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
33 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
34 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
35 ret <vscale x 1 x float> %v
38 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
39 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_unmasked:
41 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
42 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9
43 ; ZVFH-NEXT: vmv1r.v v8, v10
46 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_unmasked:
48 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
49 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
50 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
51 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
52 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10
54 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
55 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
56 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %allones, i32 %evl)
57 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %allones, i32 %evl)
58 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %allones, i32 %evl)
59 ret <vscale x 1 x float> %v
62 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
63 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_tu:
65 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
66 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
67 ; ZVFH-NEXT: vmv1r.v v8, v10
70 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_tu:
72 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
73 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
74 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
75 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
76 ; ZVFHMIN-NEXT: vfmacc.vv v10, v11, v8, v0.t
77 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
79 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
80 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
81 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %allones, i32 %evl)
82 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %allones, i32 %evl)
83 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %allones, i32 %evl)
84 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
85 ret <vscale x 1 x float> %u
88 ; FIXME: Support this case?
89 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_masked__tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
90 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_masked__tu:
92 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
93 ; ZVFH-NEXT: vmv1r.v v11, v10
94 ; ZVFH-NEXT: vfwmacc.vv v11, v8, v9, v0.t
95 ; ZVFH-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
96 ; ZVFH-NEXT: vmerge.vvm v10, v10, v11, v0
97 ; ZVFH-NEXT: vmv1r.v v8, v10
100 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_masked__tu:
102 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
103 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
104 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t
105 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
106 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10, v0.t
107 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
108 ; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v8, v0
109 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
111 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
112 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
113 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
114 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
115 ret <vscale x 1 x float> %u
118 define <vscale x 1 x float> @vfmacc_vv_nxv1f32_unmasked_tu(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
119 ; ZVFH-LABEL: vfmacc_vv_nxv1f32_unmasked_tu:
121 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
122 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9
123 ; ZVFH-NEXT: vmv1r.v v8, v10
126 ; ZVFHMIN-LABEL: vfmacc_vv_nxv1f32_unmasked_tu:
128 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
129 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
130 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
131 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
132 ; ZVFHMIN-NEXT: vfmacc.vv v10, v11, v8
133 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
135 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
136 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
137 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %allones, i32 %evl)
138 %bext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %allones, i32 %evl)
139 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %bext, <vscale x 1 x float> %c, <vscale x 1 x i1> %allones, i32 %evl)
140 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %allones, <vscale x 1 x float> %v, <vscale x 1 x float> %c, i32 %evl)
141 ret <vscale x 1 x float> %u
144 define <vscale x 1 x float> @vfmacc_vf_nxv1f32(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
145 ; ZVFH-LABEL: vfmacc_vf_nxv1f32:
147 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
148 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
149 ; ZVFH-NEXT: vmv1r.v v8, v9
152 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32:
154 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
155 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
156 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
157 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
158 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
159 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
160 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
161 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
162 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
163 ; ZVFHMIN-NEXT: vfmadd.vv v8, v10, v9, v0.t
165 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
166 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
167 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
168 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
169 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 %evl)
170 ret <vscale x 1 x float> %v
173 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
174 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_commute:
176 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
177 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
178 ; ZVFH-NEXT: vmv1r.v v8, v9
181 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute:
183 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
184 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
185 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
186 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
187 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
188 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
189 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
190 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
191 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
192 ; ZVFHMIN-NEXT: vfmadd.vv v10, v8, v9, v0.t
193 ; ZVFHMIN-NEXT: vmv1r.v v8, v10
195 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
196 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
197 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
198 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
199 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vbext, <vscale x 1 x float> %vaext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 %evl)
200 ret <vscale x 1 x float> %v
203 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, i32 zeroext %evl) {
204 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_unmasked:
206 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
207 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8
208 ; ZVFH-NEXT: vmv1r.v v8, v9
211 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked:
213 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
214 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
215 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
216 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
217 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
218 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
219 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
220 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
221 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
222 ; ZVFHMIN-NEXT: vfmadd.vv v8, v10, v9
224 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
225 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
226 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
227 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
228 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %allones, i32 %evl)
229 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %allones, i32 %evl)
230 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %allones, i32 %evl)
231 ret <vscale x 1 x float> %v
234 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
235 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_tu:
237 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
238 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
239 ; ZVFH-NEXT: vmv1r.v v8, v9
242 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_tu:
244 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
245 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
246 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
247 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
248 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
249 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
250 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
251 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
252 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
253 ; ZVFHMIN-NEXT: vfmacc.vv v9, v10, v8, v0.t
254 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
256 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
257 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
258 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
259 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
260 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %allones, i32 %evl)
261 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %allones, i32 %evl)
262 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %allones, i32 %evl)
263 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
264 ret <vscale x 1 x float> %u
267 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_commute_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
268 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_commute_tu:
270 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
271 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
272 ; ZVFH-NEXT: vmv1r.v v8, v9
275 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_commute_tu:
277 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
278 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
279 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
280 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
281 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
282 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
283 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
284 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
285 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, mu
286 ; ZVFHMIN-NEXT: vfmacc.vv v9, v8, v10, v0.t
287 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
289 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
290 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
291 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
292 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
293 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %allones, i32 %evl)
294 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %allones, i32 %evl)
295 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vbext, <vscale x 1 x float> %vaext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %allones, i32 %evl)
296 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
297 ret <vscale x 1 x float> %u
300 define <vscale x 1 x float> @vfmacc_vf_nxv1f32_unmasked_tu(<vscale x 1 x half> %va, half %b, <vscale x 1 x float> %vc, i32 zeroext %evl) {
301 ; ZVFH-LABEL: vfmacc_vf_nxv1f32_unmasked_tu:
303 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
304 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8
305 ; ZVFH-NEXT: vmv1r.v v8, v9
308 ; ZVFHMIN-LABEL: vfmacc_vf_nxv1f32_unmasked_tu:
310 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
311 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
312 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
313 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
314 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
315 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
316 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
317 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
318 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, tu, ma
319 ; ZVFHMIN-NEXT: vfmacc.vv v9, v10, v8
320 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
322 %elt.head = insertelement <vscale x 1 x half> poison, half %b, i32 0
323 %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
324 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
325 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
326 %vaext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %allones, i32 %evl)
327 %vbext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x i1> %allones, i32 %evl)
328 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %vaext, <vscale x 1 x float> %vbext, <vscale x 1 x float> %vc, <vscale x 1 x i1> %allones, i32 %evl)
329 %u = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %allones, <vscale x 1 x float> %v, <vscale x 1 x float> %vc, i32 %evl)
330 ret <vscale x 1 x float> %u
333 declare <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32)
334 declare <vscale x 2 x float> @llvm.vp.fneg.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
335 declare <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
336 declare <vscale x 2 x float> @llvm.vp.select.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
337 declare <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
339 define <vscale x 2 x float> @vfmacc_vv_nxv2f32(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
340 ; ZVFH-LABEL: vfmacc_vv_nxv2f32:
342 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
343 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
344 ; ZVFH-NEXT: vmv1r.v v8, v10
347 ; ZVFHMIN-LABEL: vfmacc_vv_nxv2f32:
349 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
350 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8, v0.t
351 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9, v0.t
352 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
353 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10, v0.t
355 %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl)
356 %bext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
357 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %bext, <vscale x 2 x float> %c, <vscale x 2 x i1> %m, i32 %evl)
358 ret <vscale x 2 x float> %v
361 define <vscale x 2 x float> @vfmacc_vv_nxv2f32_unmasked(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x float> %c, i32 zeroext %evl) {
362 ; ZVFH-LABEL: vfmacc_vv_nxv2f32_unmasked:
364 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
365 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9
366 ; ZVFH-NEXT: vmv1r.v v8, v10
369 ; ZVFHMIN-LABEL: vfmacc_vv_nxv2f32_unmasked:
371 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
372 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
373 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
374 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
375 ; ZVFHMIN-NEXT: vfmadd.vv v8, v11, v10
377 %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
378 %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
379 %aext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %allones, i32 %evl)
380 %bext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %allones, i32 %evl)
381 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %aext, <vscale x 2 x float> %bext, <vscale x 2 x float> %c, <vscale x 2 x i1> %allones, i32 %evl)
382 ret <vscale x 2 x float> %v
385 define <vscale x 2 x float> @vfmacc_vf_nxv2f32(<vscale x 2 x half> %va, half %b, <vscale x 2 x float> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) {
386 ; ZVFH-LABEL: vfmacc_vf_nxv2f32:
388 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
389 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
390 ; ZVFH-NEXT: vmv1r.v v8, v9
393 ; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32:
395 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
396 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
397 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
398 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
399 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
400 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
401 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t
402 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11, v0.t
403 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
404 ; ZVFHMIN-NEXT: vfmadd.vv v8, v10, v9, v0.t
406 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
407 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
408 %vaext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
409 %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
410 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %vaext, <vscale x 2 x float> %vbext, <vscale x 2 x float> %vc, <vscale x 2 x i1> %m, i32 %evl)
411 ret <vscale x 2 x float> %v
414 define <vscale x 2 x float> @vfmacc_vf_nxv2f32_unmasked(<vscale x 2 x half> %va, half %b, <vscale x 2 x float> %vc, i32 zeroext %evl) {
415 ; ZVFH-LABEL: vfmacc_vf_nxv2f32_unmasked:
417 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
418 ; ZVFH-NEXT: vfwmacc.vf v9, fa0, v8
419 ; ZVFH-NEXT: vmv1r.v v8, v9
422 ; ZVFHMIN-LABEL: vfmacc_vf_nxv2f32_unmasked:
424 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
425 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
426 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
427 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
428 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
429 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
430 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
431 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
432 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
433 ; ZVFHMIN-NEXT: vfmadd.vv v8, v10, v9
435 %elt.head = insertelement <vscale x 2 x half> poison, half %b, i32 0
436 %vb = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
437 %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
438 %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
439 %vaext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %allones, i32 %evl)
440 %vbext = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half> %vb, <vscale x 2 x i1> %allones, i32 %evl)
441 %v = call <vscale x 2 x float> @llvm.vp.fma.nxv2f32(<vscale x 2 x float> %vaext, <vscale x 2 x float> %vbext, <vscale x 2 x float> %vc, <vscale x 2 x i1> %allones, i32 %evl)
442 ret <vscale x 2 x float> %v
445 declare <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, i32)
446 declare <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
447 declare <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
448 declare <vscale x 4 x float> @llvm.vp.select.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
449 declare <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
451 define <vscale x 4 x float> @vfmacc_vv_nxv4f32(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
452 ; ZVFH-LABEL: vfmacc_vv_nxv4f32:
454 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
455 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9, v0.t
456 ; ZVFH-NEXT: vmv2r.v v8, v10
459 ; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32:
461 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
462 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8, v0.t
463 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9, v0.t
464 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
465 ; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10, v0.t
466 ; ZVFHMIN-NEXT: vmv.v.v v8, v12
468 %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
469 %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
470 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %bext, <vscale x 4 x float> %c, <vscale x 4 x i1> %m, i32 %evl)
471 ret <vscale x 4 x float> %v
474 define <vscale x 4 x float> @vfmacc_vv_nxv4f32_unmasked(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x float> %c, i32 zeroext %evl) {
475 ; ZVFH-LABEL: vfmacc_vv_nxv4f32_unmasked:
477 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
478 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v9
479 ; ZVFH-NEXT: vmv2r.v v8, v10
482 ; ZVFHMIN-LABEL: vfmacc_vv_nxv4f32_unmasked:
484 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
485 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
486 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
487 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
488 ; ZVFHMIN-NEXT: vfmadd.vv v12, v14, v10
489 ; ZVFHMIN-NEXT: vmv.v.v v8, v12
491 %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
492 %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
493 %aext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %allones, i32 %evl)
494 %bext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %allones, i32 %evl)
495 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %aext, <vscale x 4 x float> %bext, <vscale x 4 x float> %c, <vscale x 4 x i1> %allones, i32 %evl)
496 ret <vscale x 4 x float> %v
499 define <vscale x 4 x float> @vfmacc_vf_nxv4f32(<vscale x 4 x half> %va, half %b, <vscale x 4 x float> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) {
500 ; ZVFH-LABEL: vfmacc_vf_nxv4f32:
502 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
503 ; ZVFH-NEXT: vfwmacc.vf v10, fa0, v8, v0.t
504 ; ZVFH-NEXT: vmv2r.v v8, v10
507 ; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32:
509 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
510 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
511 ; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
512 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
513 ; ZVFHMIN-NEXT: vfncvt.f.f.w v14, v12
514 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
515 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t
516 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14, v0.t
517 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
518 ; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v10, v0.t
520 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
521 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
522 %vaext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
523 %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
524 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %vaext, <vscale x 4 x float> %vbext, <vscale x 4 x float> %vc, <vscale x 4 x i1> %m, i32 %evl)
525 ret <vscale x 4 x float> %v
528 define <vscale x 4 x float> @vfmacc_vf_nxv4f32_unmasked(<vscale x 4 x half> %va, half %b, <vscale x 4 x float> %vc, i32 zeroext %evl) {
529 ; ZVFH-LABEL: vfmacc_vf_nxv4f32_unmasked:
531 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
532 ; ZVFH-NEXT: vfwmacc.vf v10, fa0, v8
533 ; ZVFH-NEXT: vmv2r.v v8, v10
536 ; ZVFHMIN-LABEL: vfmacc_vf_nxv4f32_unmasked:
538 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
539 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
540 ; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
541 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
542 ; ZVFHMIN-NEXT: vfncvt.f.f.w v14, v12
543 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
544 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
545 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v14
546 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
547 ; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v10
549 %elt.head = insertelement <vscale x 4 x half> poison, half %b, i32 0
550 %vb = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
551 %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
552 %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
553 %vaext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %allones, i32 %evl)
554 %vbext = call <vscale x 4 x float> @llvm.vp.fpext.nxv4f32.nxv4f16(<vscale x 4 x half> %vb, <vscale x 4 x i1> %allones, i32 %evl)
555 %v = call <vscale x 4 x float> @llvm.vp.fma.nxv4f32(<vscale x 4 x float> %vaext, <vscale x 4 x float> %vbext, <vscale x 4 x float> %vc, <vscale x 4 x i1> %allones, i32 %evl)
556 ret <vscale x 4 x float> %v
559 declare <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x i1>, i32)
560 declare <vscale x 8 x float> @llvm.vp.fneg.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
561 declare <vscale x 8 x float> @llvm.vp.merge.nxv8f32(<vscale x 8 x i1>, <vscale x 8 x float>, <vscale x 8 x float>, i32)
562 declare <vscale x 8 x float> @llvm.vp.select.nxv8f32(<vscale x 8 x i1>, <vscale x 8 x float>, <vscale x 8 x float>, i32)
563 declare <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
565 define <vscale x 8 x float> @vfmacc_vv_nxv8f32(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
566 ; ZVFH-LABEL: vfmacc_vv_nxv8f32:
568 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
569 ; ZVFH-NEXT: vfwmacc.vv v12, v8, v10, v0.t
570 ; ZVFH-NEXT: vmv4r.v v8, v12
573 ; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32:
575 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
576 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8, v0.t
577 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10, v0.t
578 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
579 ; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12, v0.t
580 ; ZVFHMIN-NEXT: vmv.v.v v8, v16
582 %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
583 %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
584 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %bext, <vscale x 8 x float> %c, <vscale x 8 x i1> %m, i32 %evl)
585 ret <vscale x 8 x float> %v
588 define <vscale x 8 x float> @vfmacc_vv_nxv8f32_unmasked(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x float> %c, i32 zeroext %evl) {
589 ; ZVFH-LABEL: vfmacc_vv_nxv8f32_unmasked:
591 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
592 ; ZVFH-NEXT: vfwmacc.vv v12, v8, v10
593 ; ZVFH-NEXT: vmv4r.v v8, v12
596 ; ZVFHMIN-LABEL: vfmacc_vv_nxv8f32_unmasked:
598 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
599 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v8
600 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
601 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
602 ; ZVFHMIN-NEXT: vfmadd.vv v16, v20, v12
603 ; ZVFHMIN-NEXT: vmv.v.v v8, v16
605 %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
606 %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
607 %aext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %allones, i32 %evl)
608 %bext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %allones, i32 %evl)
609 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %aext, <vscale x 8 x float> %bext, <vscale x 8 x float> %c, <vscale x 8 x i1> %allones, i32 %evl)
610 ret <vscale x 8 x float> %v
613 define <vscale x 8 x float> @vfmacc_vf_nxv8f32(<vscale x 8 x half> %va, half %b, <vscale x 8 x float> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) {
614 ; ZVFH-LABEL: vfmacc_vf_nxv8f32:
616 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
617 ; ZVFH-NEXT: vfwmacc.vf v12, fa0, v8, v0.t
618 ; ZVFH-NEXT: vmv4r.v v8, v12
621 ; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32:
623 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
624 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
625 ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
626 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
627 ; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v16
628 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
629 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
630 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20, v0.t
631 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
632 ; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v12, v0.t
634 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
635 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
636 %vaext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
637 %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
638 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %vaext, <vscale x 8 x float> %vbext, <vscale x 8 x float> %vc, <vscale x 8 x i1> %m, i32 %evl)
639 ret <vscale x 8 x float> %v
642 define <vscale x 8 x float> @vfmacc_vf_nxv8f32_unmasked(<vscale x 8 x half> %va, half %b, <vscale x 8 x float> %vc, i32 zeroext %evl) {
643 ; ZVFH-LABEL: vfmacc_vf_nxv8f32_unmasked:
645 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
646 ; ZVFH-NEXT: vfwmacc.vf v12, fa0, v8
647 ; ZVFH-NEXT: vmv4r.v v8, v12
650 ; ZVFHMIN-LABEL: vfmacc_vf_nxv8f32_unmasked:
652 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
653 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
654 ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
655 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
656 ; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v16
657 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
658 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
659 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
660 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
661 ; ZVFHMIN-NEXT: vfmadd.vv v8, v16, v12
663 %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0
664 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
665 %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
666 %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
667 %vaext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %allones, i32 %evl)
668 %vbext = call <vscale x 8 x float> @llvm.vp.fpext.nxv8f32.nxv8f16(<vscale x 8 x half> %vb, <vscale x 8 x i1> %allones, i32 %evl)
669 %v = call <vscale x 8 x float> @llvm.vp.fma.nxv8f32(<vscale x 8 x float> %vaext, <vscale x 8 x float> %vbext, <vscale x 8 x float> %vc, <vscale x 8 x i1> %allones, i32 %evl)
670 ret <vscale x 8 x float> %v
673 declare <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x i1>, i32)
674 declare <vscale x 16 x float> @llvm.vp.fneg.nxv16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32)
675 declare <vscale x 16 x float> @llvm.vp.merge.nxv16f32(<vscale x 16 x i1>, <vscale x 16 x float>, <vscale x 16 x float>, i32)
676 declare <vscale x 16 x float> @llvm.vp.select.nxv16f32(<vscale x 16 x i1>, <vscale x 16 x float>, <vscale x 16 x float>, i32)
677 declare <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half>, <vscale x 16 x i1>, i32)
679 define <vscale x 16 x float> @vfmacc_vv_nxv16f32(<vscale x 16 x half> %a, <vscale x 16 x half> %b, <vscale x 16 x float> %c, <vscale x 16 x i1> %m, i32 zeroext %evl) {
680 ; ZVFH-LABEL: vfmacc_vv_nxv16f32:
682 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
683 ; ZVFH-NEXT: vfwmacc.vv v16, v8, v12, v0.t
684 ; ZVFH-NEXT: vmv8r.v v8, v16
687 ; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32:
689 ; ZVFHMIN-NEXT: addi sp, sp, -16
690 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
691 ; ZVFHMIN-NEXT: csrr a1, vlenb
692 ; ZVFHMIN-NEXT: slli a1, a1, 3
693 ; ZVFHMIN-NEXT: sub sp, sp, a1
694 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
695 ; ZVFHMIN-NEXT: addi a1, sp, 16
696 ; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
697 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
698 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t
699 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12, v0.t
700 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
701 ; ZVFHMIN-NEXT: addi a0, sp, 16
702 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
703 ; ZVFHMIN-NEXT: vfmadd.vv v24, v16, v8, v0.t
704 ; ZVFHMIN-NEXT: vmv.v.v v8, v24
705 ; ZVFHMIN-NEXT: csrr a0, vlenb
706 ; ZVFHMIN-NEXT: slli a0, a0, 3
707 ; ZVFHMIN-NEXT: add sp, sp, a0
708 ; ZVFHMIN-NEXT: addi sp, sp, 16
710 %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> %m, i32 %evl)
711 %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> %m, i32 %evl)
712 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %aext, <vscale x 16 x float> %bext, <vscale x 16 x float> %c, <vscale x 16 x i1> %m, i32 %evl)
713 ret <vscale x 16 x float> %v
716 define <vscale x 16 x float> @vfmacc_vv_nxv16f32_unmasked(<vscale x 16 x half> %a, <vscale x 16 x half> %b, <vscale x 16 x float> %c, i32 zeroext %evl) {
717 ; ZVFH-LABEL: vfmacc_vv_nxv16f32_unmasked:
719 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
720 ; ZVFH-NEXT: vfwmacc.vv v16, v8, v12
721 ; ZVFH-NEXT: vmv8r.v v8, v16
724 ; ZVFHMIN-LABEL: vfmacc_vv_nxv16f32_unmasked:
726 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
727 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v8
728 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
729 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
730 ; ZVFHMIN-NEXT: vfmadd.vv v24, v0, v16
731 ; ZVFHMIN-NEXT: vmv.v.v v8, v24
733 %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
734 %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
735 %aext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x i1> %allones, i32 %evl)
736 %bext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %b, <vscale x 16 x i1> %allones, i32 %evl)
737 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %aext, <vscale x 16 x float> %bext, <vscale x 16 x float> %c, <vscale x 16 x i1> %allones, i32 %evl)
738 ret <vscale x 16 x float> %v
741 define <vscale x 16 x float> @vfmacc_vf_nxv16f32(<vscale x 16 x half> %va, half %b, <vscale x 16 x float> %vc, <vscale x 16 x i1> %m, i32 zeroext %evl) {
742 ; ZVFH-LABEL: vfmacc_vf_nxv16f32:
744 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
745 ; ZVFH-NEXT: vfwmacc.vf v16, fa0, v8, v0.t
746 ; ZVFH-NEXT: vmv8r.v v8, v16
749 ; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32:
751 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
752 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma
753 ; ZVFHMIN-NEXT: vfmv.v.f v24, fa5
754 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
755 ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24
756 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
757 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t
758 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4, v0.t
759 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
760 ; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16, v0.t
762 %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
763 %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
764 %vaext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
765 %vbext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
766 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %vaext, <vscale x 16 x float> %vbext, <vscale x 16 x float> %vc, <vscale x 16 x i1> %m, i32 %evl)
767 ret <vscale x 16 x float> %v
770 define <vscale x 16 x float> @vfmacc_vf_nxv16f32_unmasked(<vscale x 16 x half> %va, half %b, <vscale x 16 x float> %vc, i32 zeroext %evl) {
771 ; ZVFH-LABEL: vfmacc_vf_nxv16f32_unmasked:
773 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
774 ; ZVFH-NEXT: vfwmacc.vf v16, fa0, v8
775 ; ZVFH-NEXT: vmv8r.v v8, v16
778 ; ZVFHMIN-LABEL: vfmacc_vf_nxv16f32_unmasked:
780 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
781 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma
782 ; ZVFHMIN-NEXT: vfmv.v.f v24, fa5
783 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
784 ; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v24
785 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
786 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
787 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0
788 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
789 ; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16
791 %elt.head = insertelement <vscale x 16 x half> poison, half %b, i32 0
792 %vb = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
793 %splat = insertelement <vscale x 16 x i1> poison, i1 -1, i32 0
794 %allones = shufflevector <vscale x 16 x i1> %splat, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
795 %vaext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %allones, i32 %evl)
796 %vbext = call <vscale x 16 x float> @llvm.vp.fpext.nxv16f32.nxv16f16(<vscale x 16 x half> %vb, <vscale x 16 x i1> %allones, i32 %evl)
797 %v = call <vscale x 16 x float> @llvm.vp.fma.nxv16f32(<vscale x 16 x float> %vaext, <vscale x 16 x float> %vbext, <vscale x 16 x float> %vc, <vscale x 16 x i1> %allones, i32 %evl)
798 ret <vscale x 16 x float> %v
801 declare <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x i1>, i32)
802 declare <vscale x 1 x double> @llvm.vp.fneg.nxv1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32)
803 declare <vscale x 1 x double> @llvm.vp.merge.nxv1f64(<vscale x 1 x i1>, <vscale x 1 x double>, <vscale x 1 x double>, i32)
804 declare <vscale x 1 x double> @llvm.vp.select.nxv1f64(<vscale x 1 x i1>, <vscale x 1 x double>, <vscale x 1 x double>, i32)
805 declare <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32)
807 define <vscale x 1 x double> @vfmacc_vv_nxv1f64(<vscale x 1 x float> %a, <vscale x 1 x float> %b, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
808 ; CHECK-LABEL: vfmacc_vv_nxv1f64:
810 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
811 ; CHECK-NEXT: vfwmacc.vv v10, v8, v9, v0.t
812 ; CHECK-NEXT: vmv1r.v v8, v10
814 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x i1> %m, i32 %evl)
815 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %b, <vscale x 1 x i1> %m, i32 %evl)
816 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 %evl)
817 ret <vscale x 1 x double> %v
820 define <vscale x 1 x double> @vfmacc_vv_nxv1f64_unmasked(<vscale x 1 x float> %a, <vscale x 1 x float> %b, <vscale x 1 x double> %c, i32 zeroext %evl) {
821 ; CHECK-LABEL: vfmacc_vv_nxv1f64_unmasked:
823 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
824 ; CHECK-NEXT: vfwmacc.vv v10, v8, v9
825 ; CHECK-NEXT: vmv1r.v v8, v10
827 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
828 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
829 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x i1> %allones, i32 %evl)
830 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %b, <vscale x 1 x i1> %allones, i32 %evl)
831 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> %allones, i32 %evl)
832 ret <vscale x 1 x double> %v
835 define <vscale x 1 x double> @vfmacc_vf_nxv1f64(<vscale x 1 x float> %va, float %b, <vscale x 1 x double> %vc, <vscale x 1 x i1> %m, i32 zeroext %evl) {
836 ; CHECK-LABEL: vfmacc_vf_nxv1f64:
838 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
839 ; CHECK-NEXT: vfwmacc.vf v9, fa0, v8, v0.t
840 ; CHECK-NEXT: vmv1r.v v8, v9
842 %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0
843 %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
844 %vaext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %m, i32 %evl)
845 %vbext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl)
846 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %vaext, <vscale x 1 x double> %vbext, <vscale x 1 x double> %vc, <vscale x 1 x i1> %m, i32 %evl)
847 ret <vscale x 1 x double> %v
850 define <vscale x 1 x double> @vfmacc_vf_nxv1f64_unmasked(<vscale x 1 x float> %va, float %b, <vscale x 1 x double> %vc, i32 zeroext %evl) {
851 ; CHECK-LABEL: vfmacc_vf_nxv1f64_unmasked:
853 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
854 ; CHECK-NEXT: vfwmacc.vf v9, fa0, v8
855 ; CHECK-NEXT: vmv1r.v v8, v9
857 %elt.head = insertelement <vscale x 1 x float> poison, float %b, i32 0
858 %vb = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
859 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
860 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
861 %vaext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x i1> %allones, i32 %evl)
862 %vbext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f32(<vscale x 1 x float> %vb, <vscale x 1 x i1> %allones, i32 %evl)
863 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %vaext, <vscale x 1 x double> %vbext, <vscale x 1 x double> %vc, <vscale x 1 x i1> %allones, i32 %evl)
864 ret <vscale x 1 x double> %v
867 declare <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, i32)
868 declare <vscale x 2 x double> @llvm.vp.fneg.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
869 declare <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
870 declare <vscale x 2 x double> @llvm.vp.select.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
871 declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
873 define <vscale x 2 x double> @vfmacc_vv_nxv2f64(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
874 ; CHECK-LABEL: vfmacc_vv_nxv2f64:
876 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
877 ; CHECK-NEXT: vfwmacc.vv v10, v8, v9, v0.t
878 ; CHECK-NEXT: vmv2r.v v8, v10
880 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %evl)
881 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x i1> %m, i32 %evl)
882 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 %evl)
883 ret <vscale x 2 x double> %v
886 define <vscale x 2 x double> @vfmacc_vv_nxv2f64_unmasked(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x double> %c, i32 zeroext %evl) {
887 ; CHECK-LABEL: vfmacc_vv_nxv2f64_unmasked:
889 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
890 ; CHECK-NEXT: vfwmacc.vv v10, v8, v9
891 ; CHECK-NEXT: vmv2r.v v8, v10
893 %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
894 %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
895 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %allones, i32 %evl)
896 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %b, <vscale x 2 x i1> %allones, i32 %evl)
897 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> %allones, i32 %evl)
898 ret <vscale x 2 x double> %v
901 define <vscale x 2 x double> @vfmacc_vf_nxv2f64(<vscale x 2 x float> %va, float %b, <vscale x 2 x double> %vc, <vscale x 2 x i1> %m, i32 zeroext %evl) {
902 ; CHECK-LABEL: vfmacc_vf_nxv2f64:
904 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
905 ; CHECK-NEXT: vfwmacc.vf v10, fa0, v8, v0.t
906 ; CHECK-NEXT: vmv2r.v v8, v10
908 %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0
909 %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
910 %vaext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %m, i32 %evl)
911 %vbext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl)
912 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %vaext, <vscale x 2 x double> %vbext, <vscale x 2 x double> %vc, <vscale x 2 x i1> %m, i32 %evl)
913 ret <vscale x 2 x double> %v
916 define <vscale x 2 x double> @vfmacc_vf_nxv2f64_unmasked(<vscale x 2 x float> %va, float %b, <vscale x 2 x double> %vc, i32 zeroext %evl) {
917 ; CHECK-LABEL: vfmacc_vf_nxv2f64_unmasked:
919 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
920 ; CHECK-NEXT: vfwmacc.vf v10, fa0, v8
921 ; CHECK-NEXT: vmv2r.v v8, v10
923 %elt.head = insertelement <vscale x 2 x float> poison, float %b, i32 0
924 %vb = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
925 %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
926 %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
927 %vaext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x i1> %allones, i32 %evl)
928 %vbext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %vb, <vscale x 2 x i1> %allones, i32 %evl)
929 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %vaext, <vscale x 2 x double> %vbext, <vscale x 2 x double> %vc, <vscale x 2 x i1> %allones, i32 %evl)
930 ret <vscale x 2 x double> %v
933 declare <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x i1>, i32)
934 declare <vscale x 4 x double> @llvm.vp.fneg.nxv4f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32)
935 declare <vscale x 4 x double> @llvm.vp.merge.nxv4f64(<vscale x 4 x i1>, <vscale x 4 x double>, <vscale x 4 x double>, i32)
936 declare <vscale x 4 x double> @llvm.vp.select.nxv4f64(<vscale x 4 x i1>, <vscale x 4 x double>, <vscale x 4 x double>, i32)
937 declare <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32)
939 define <vscale x 4 x double> @vfmacc_vv_nxv4f64(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
940 ; CHECK-LABEL: vfmacc_vv_nxv4f64:
942 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
943 ; CHECK-NEXT: vfwmacc.vv v12, v8, v10, v0.t
944 ; CHECK-NEXT: vmv4r.v v8, v12
946 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %m, i32 %evl)
947 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x i1> %m, i32 %evl)
948 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 %evl)
949 ret <vscale x 4 x double> %v
952 define <vscale x 4 x double> @vfmacc_vv_nxv4f64_unmasked(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x double> %c, i32 zeroext %evl) {
953 ; CHECK-LABEL: vfmacc_vv_nxv4f64_unmasked:
955 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
956 ; CHECK-NEXT: vfwmacc.vv v12, v8, v10
957 ; CHECK-NEXT: vmv4r.v v8, v12
959 %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
960 %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
961 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %allones, i32 %evl)
962 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %b, <vscale x 4 x i1> %allones, i32 %evl)
963 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> %allones, i32 %evl)
964 ret <vscale x 4 x double> %v
967 define <vscale x 4 x double> @vfmacc_vf_nxv4f64(<vscale x 4 x float> %va, float %b, <vscale x 4 x double> %vc, <vscale x 4 x i1> %m, i32 zeroext %evl) {
968 ; CHECK-LABEL: vfmacc_vf_nxv4f64:
970 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
971 ; CHECK-NEXT: vfwmacc.vf v12, fa0, v8, v0.t
972 ; CHECK-NEXT: vmv4r.v v8, v12
974 %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0
975 %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
976 %vaext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %m, i32 %evl)
977 %vbext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl)
978 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %vaext, <vscale x 4 x double> %vbext, <vscale x 4 x double> %vc, <vscale x 4 x i1> %m, i32 %evl)
979 ret <vscale x 4 x double> %v
982 define <vscale x 4 x double> @vfmacc_vf_nxv4f64_unmasked(<vscale x 4 x float> %va, float %b, <vscale x 4 x double> %vc, i32 zeroext %evl) {
983 ; CHECK-LABEL: vfmacc_vf_nxv4f64_unmasked:
985 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
986 ; CHECK-NEXT: vfwmacc.vf v12, fa0, v8
987 ; CHECK-NEXT: vmv4r.v v8, v12
989 %elt.head = insertelement <vscale x 4 x float> poison, float %b, i32 0
990 %vb = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
991 %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
992 %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
993 %vaext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x i1> %allones, i32 %evl)
994 %vbext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> %vb, <vscale x 4 x i1> %allones, i32 %evl)
995 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %vaext, <vscale x 4 x double> %vbext, <vscale x 4 x double> %vc, <vscale x 4 x i1> %allones, i32 %evl)
996 ret <vscale x 4 x double> %v
999 declare <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x i1>, i32)
1000 declare <vscale x 8 x double> @llvm.vp.fneg.nxv8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32)
1001 declare <vscale x 8 x double> @llvm.vp.merge.nxv8f64(<vscale x 8 x i1>, <vscale x 8 x double>, <vscale x 8 x double>, i32)
1002 declare <vscale x 8 x double> @llvm.vp.select.nxv8f64(<vscale x 8 x i1>, <vscale x 8 x double>, <vscale x 8 x double>, i32)
1003 declare <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32)
1005 define <vscale x 8 x double> @vfmacc_vv_nxv8f64(<vscale x 8 x float> %a, <vscale x 8 x float> %b, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1006 ; CHECK-LABEL: vfmacc_vv_nxv8f64:
1008 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1009 ; CHECK-NEXT: vfwmacc.vv v16, v8, v12, v0.t
1010 ; CHECK-NEXT: vmv8r.v v8, v16
1012 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x i1> %m, i32 %evl)
1013 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %b, <vscale x 8 x i1> %m, i32 %evl)
1014 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 %evl)
1015 ret <vscale x 8 x double> %v
1018 define <vscale x 8 x double> @vfmacc_vv_nxv8f64_unmasked(<vscale x 8 x float> %a, <vscale x 8 x float> %b, <vscale x 8 x double> %c, i32 zeroext %evl) {
1019 ; CHECK-LABEL: vfmacc_vv_nxv8f64_unmasked:
1021 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1022 ; CHECK-NEXT: vfwmacc.vv v16, v8, v12
1023 ; CHECK-NEXT: vmv8r.v v8, v16
1025 %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1026 %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1027 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x i1> %allones, i32 %evl)
1028 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %b, <vscale x 8 x i1> %allones, i32 %evl)
1029 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> %allones, i32 %evl)
1030 ret <vscale x 8 x double> %v
1033 define <vscale x 8 x double> @vfmacc_vf_nxv8f64(<vscale x 8 x float> %va, float %b, <vscale x 8 x double> %vc, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1034 ; CHECK-LABEL: vfmacc_vf_nxv8f64:
1036 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1037 ; CHECK-NEXT: vfwmacc.vf v16, fa0, v8, v0.t
1038 ; CHECK-NEXT: vmv8r.v v8, v16
1040 %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0
1041 %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
1042 %vaext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %m, i32 %evl)
1043 %vbext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl)
1044 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %vaext, <vscale x 8 x double> %vbext, <vscale x 8 x double> %vc, <vscale x 8 x i1> %m, i32 %evl)
1045 ret <vscale x 8 x double> %v
1048 define <vscale x 8 x double> @vfmacc_vf_nxv8f64_unmasked(<vscale x 8 x float> %va, float %b, <vscale x 8 x double> %vc, i32 zeroext %evl) {
1049 ; CHECK-LABEL: vfmacc_vf_nxv8f64_unmasked:
1051 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma
1052 ; CHECK-NEXT: vfwmacc.vf v16, fa0, v8
1053 ; CHECK-NEXT: vmv8r.v v8, v16
1055 %elt.head = insertelement <vscale x 8 x float> poison, float %b, i32 0
1056 %vb = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
1057 %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1058 %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1059 %vaext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x i1> %allones, i32 %evl)
1060 %vbext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f32(<vscale x 8 x float> %vb, <vscale x 8 x i1> %allones, i32 %evl)
1061 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %vaext, <vscale x 8 x double> %vbext, <vscale x 8 x double> %vc, <vscale x 8 x i1> %allones, i32 %evl)
1062 ret <vscale x 8 x double> %v
1065 declare <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
1067 define <vscale x 1 x double> @vfmacc_vv_nxv1f64_nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1068 ; CHECK-LABEL: vfmacc_vv_nxv1f64_nxv1f16:
1070 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1071 ; CHECK-NEXT: vfwcvt.f.f.v v11, v8, v0.t
1072 ; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t
1073 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1074 ; CHECK-NEXT: vfwmacc.vv v10, v11, v8, v0.t
1075 ; CHECK-NEXT: vmv1r.v v8, v10
1077 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
1078 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %m, i32 %evl)
1079 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> %m, i32 %evl)
1080 ret <vscale x 1 x double> %v
1083 define <vscale x 1 x double> @vfmacc_vv_nxv1f64_nxv1f16_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x double> %c, i32 zeroext %evl) {
1084 ; CHECK-LABEL: vfmacc_vv_nxv1f64_nxv1f16_unmasked:
1086 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1087 ; CHECK-NEXT: vfwcvt.f.f.v v11, v8
1088 ; CHECK-NEXT: vfwcvt.f.f.v v8, v9
1089 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1090 ; CHECK-NEXT: vfwmacc.vv v10, v11, v8
1091 ; CHECK-NEXT: vmv1r.v v8, v10
1093 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
1094 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1095 %aext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %allones, i32 %evl)
1096 %bext = call <vscale x 1 x double> @llvm.vp.fpext.nxv1f64.nxv1f16(<vscale x 1 x half> %b, <vscale x 1 x i1> %allones, i32 %evl)
1097 %v = call <vscale x 1 x double> @llvm.vp.fma.nxv1f64(<vscale x 1 x double> %aext, <vscale x 1 x double> %bext, <vscale x 1 x double> %c, <vscale x 1 x i1> %allones, i32 %evl)
1098 ret <vscale x 1 x double> %v
1101 declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
1103 define <vscale x 2 x double> @vfmacc_vv_nxv2f64_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1104 ; CHECK-LABEL: vfmacc_vv_nxv2f64_nxv2f16:
1106 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1107 ; CHECK-NEXT: vfwcvt.f.f.v v12, v8, v0.t
1108 ; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t
1109 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1110 ; CHECK-NEXT: vfwmacc.vv v10, v12, v8, v0.t
1111 ; CHECK-NEXT: vmv2r.v v8, v10
1113 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %m, i32 %evl)
1114 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 %evl)
1115 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> %m, i32 %evl)
1116 ret <vscale x 2 x double> %v
1119 define <vscale x 2 x double> @vfmacc_vv_nxv2f64_nxv2f16_unmasked(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x double> %c, i32 zeroext %evl) {
1120 ; CHECK-LABEL: vfmacc_vv_nxv2f64_nxv2f16_unmasked:
1122 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
1123 ; CHECK-NEXT: vfwcvt.f.f.v v12, v8
1124 ; CHECK-NEXT: vfwcvt.f.f.v v8, v9
1125 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1126 ; CHECK-NEXT: vfwmacc.vv v10, v12, v8
1127 ; CHECK-NEXT: vmv2r.v v8, v10
1129 %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0
1130 %allones = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
1131 %aext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x i1> %allones, i32 %evl)
1132 %bext = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f16(<vscale x 2 x half> %b, <vscale x 2 x i1> %allones, i32 %evl)
1133 %v = call <vscale x 2 x double> @llvm.vp.fma.nxv2f64(<vscale x 2 x double> %aext, <vscale x 2 x double> %bext, <vscale x 2 x double> %c, <vscale x 2 x i1> %allones, i32 %evl)
1134 ret <vscale x 2 x double> %v
1137 declare <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, i32)
1139 define <vscale x 4 x double> @vfmacc_vv_nxv4f64_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1140 ; CHECK-LABEL: vfmacc_vv_nxv4f64_nxv4f16:
1142 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1143 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8, v0.t
1144 ; CHECK-NEXT: vfwcvt.f.f.v v16, v9, v0.t
1145 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1146 ; CHECK-NEXT: vfwmacc.vv v12, v10, v16, v0.t
1147 ; CHECK-NEXT: vmv4r.v v8, v12
1149 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %m, i32 %evl)
1150 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %m, i32 %evl)
1151 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> %m, i32 %evl)
1152 ret <vscale x 4 x double> %v
1155 define <vscale x 4 x double> @vfmacc_vv_nxv4f64_nxv4f16_unmasked(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x double> %c, i32 zeroext %evl) {
1156 ; CHECK-LABEL: vfmacc_vv_nxv4f64_nxv4f16_unmasked:
1158 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
1159 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8
1160 ; CHECK-NEXT: vfwcvt.f.f.v v16, v9
1161 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1162 ; CHECK-NEXT: vfwmacc.vv v12, v10, v16
1163 ; CHECK-NEXT: vmv4r.v v8, v12
1165 %splat = insertelement <vscale x 4 x i1> poison, i1 -1, i32 0
1166 %allones = shufflevector <vscale x 4 x i1> %splat, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
1167 %aext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x i1> %allones, i32 %evl)
1168 %bext = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f16(<vscale x 4 x half> %b, <vscale x 4 x i1> %allones, i32 %evl)
1169 %v = call <vscale x 4 x double> @llvm.vp.fma.nxv4f64(<vscale x 4 x double> %aext, <vscale x 4 x double> %bext, <vscale x 4 x double> %c, <vscale x 4 x i1> %allones, i32 %evl)
1170 ret <vscale x 4 x double> %v
1173 declare <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, i32)
1175 define <vscale x 8 x double> @vfmacc_vv_nxv8f64_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1176 ; CHECK-LABEL: vfmacc_vv_nxv8f64_nxv8f16:
1178 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
1179 ; CHECK-NEXT: vfwcvt.f.f.v v12, v8, v0.t
1180 ; CHECK-NEXT: vfwcvt.f.f.v v24, v10, v0.t
1181 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1182 ; CHECK-NEXT: vfwmacc.vv v16, v12, v24, v0.t
1183 ; CHECK-NEXT: vmv8r.v v8, v16
1185 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %m, i32 %evl)
1186 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %m, i32 %evl)
1187 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> %m, i32 %evl)
1188 ret <vscale x 8 x double> %v
1191 define <vscale x 8 x double> @vfmacc_vv_nxv8f64_nxv8f16_unmasked(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x double> %c, i32 zeroext %evl) {
1192 ; CHECK-LABEL: vfmacc_vv_nxv8f64_nxv8f16_unmasked:
1194 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
1195 ; CHECK-NEXT: vfwcvt.f.f.v v12, v8
1196 ; CHECK-NEXT: vfwcvt.f.f.v v24, v10
1197 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1198 ; CHECK-NEXT: vfwmacc.vv v16, v12, v24
1199 ; CHECK-NEXT: vmv8r.v v8, v16
1201 %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
1202 %allones = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
1203 %aext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %allones, i32 %evl)
1204 %bext = call <vscale x 8 x double> @llvm.vp.fpext.nxv8f64.nxv8f16(<vscale x 8 x half> %b, <vscale x 8 x i1> %allones, i32 %evl)
1205 %v = call <vscale x 8 x double> @llvm.vp.fma.nxv8f64(<vscale x 8 x double> %aext, <vscale x 8 x double> %bext, <vscale x 8 x double> %c, <vscale x 8 x i1> %allones, i32 %evl)
1206 ret <vscale x 8 x double> %v
1209 define <vscale x 1 x float> @vfmacc_squared_nxv1f32(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1210 ; ZVFH-LABEL: vfmacc_squared_nxv1f32:
1212 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1213 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v8, v0.t
1214 ; ZVFH-NEXT: vmv1r.v v8, v10
1217 ; ZVFHMIN-LABEL: vfmacc_squared_nxv1f32:
1219 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1220 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8, v0.t
1221 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1222 ; ZVFHMIN-NEXT: vfmadd.vv v9, v9, v10, v0.t
1223 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
1225 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %m, i32 %evl)
1226 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %aext, <vscale x 1 x float> %c, <vscale x 1 x i1> %m, i32 %evl)
1227 ret <vscale x 1 x float> %v
1230 define <vscale x 1 x float> @vfmacc_squared_nxv1f32_unmasked(<vscale x 1 x half> %a, <vscale x 1 x half> %b, <vscale x 1 x float> %c, i32 zeroext %evl) {
1231 ; ZVFH-LABEL: vfmacc_squared_nxv1f32_unmasked:
1233 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1234 ; ZVFH-NEXT: vfwmacc.vv v10, v8, v8
1235 ; ZVFH-NEXT: vmv1r.v v8, v10
1238 ; ZVFHMIN-LABEL: vfmacc_squared_nxv1f32_unmasked:
1240 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
1241 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
1242 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1243 ; ZVFHMIN-NEXT: vfmadd.vv v9, v9, v10
1244 ; ZVFHMIN-NEXT: vmv1r.v v8, v9
1246 %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
1247 %allones = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1248 %aext = call <vscale x 1 x float> @llvm.vp.fpext.nxv1f32.nxv1f16(<vscale x 1 x half> %a, <vscale x 1 x i1> %allones, i32 %evl)
1249 %v = call <vscale x 1 x float> @llvm.vp.fma.nxv1f32(<vscale x 1 x float> %aext, <vscale x 1 x float> %aext, <vscale x 1 x float> %c, <vscale x 1 x i1> %allones, i32 %evl)
1250 ret <vscale x 1 x float> %v