1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s
7 declare <2 x half> @llvm.vp.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, <2 x i1>, i32)
8 declare <2 x half> @llvm.vp.fneg.v2f16(<2 x half>, <2 x i1>, i32)
9 declare <2 x half> @llvm.vp.merge.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32)
10 declare <2 x half> @llvm.vp.select.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32)
12 define <2 x half> @vfnmacc_vv_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
13 ; CHECK-LABEL: vfnmacc_vv_v2f16:
15 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
16 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t
17 ; CHECK-NEXT: vmv1r.v v8, v10
19 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
20 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
21 %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl)
22 %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl)
23 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl)
24 %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl)
28 define <2 x half> @vfnmacc_vv_v2f16_unmasked(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
29 ; CHECK-LABEL: vfnmacc_vv_v2f16_unmasked:
31 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
32 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9
33 ; CHECK-NEXT: vmv1r.v v8, v10
35 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
36 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
37 %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl)
38 %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl)
39 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl)
40 %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl)
44 define <2 x half> @vfnmacc_vf_v2f16(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
45 ; CHECK-LABEL: vfnmacc_vf_v2f16:
47 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
48 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
49 ; CHECK-NEXT: vmv1r.v v8, v9
51 %elt.head = insertelement <2 x half> poison, half %b, i32 0
52 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
53 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
54 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
55 %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl)
56 %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl)
57 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl)
58 %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl)
62 define <2 x half> @vfnmacc_vf_v2f16_commute(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
63 ; CHECK-LABEL: vfnmacc_vf_v2f16_commute:
65 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
66 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
67 ; CHECK-NEXT: vmv1r.v v8, v9
69 %elt.head = insertelement <2 x half> poison, half %b, i32 0
70 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
71 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
72 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
73 %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl)
74 %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl)
75 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %nega, <2 x half> %negc, <2 x i1> %allones, i32 %evl)
76 %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl)
80 define <2 x half> @vfnmacc_vf_v2f16_unmasked(<2 x half> %a, half %b, <2 x half> %c, i32 zeroext %evl) {
81 ; CHECK-LABEL: vfnmacc_vf_v2f16_unmasked:
83 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
84 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8
85 ; CHECK-NEXT: vmv1r.v v8, v9
87 %elt.head = insertelement <2 x half> poison, half %b, i32 0
88 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
89 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
90 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
91 %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl)
92 %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl)
93 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl)
94 %u = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %allones, <2 x half> %v, <2 x half> %c, i32 %evl)
98 define <2 x half> @vfnmacc_vv_v2f16_ta(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
99 ; CHECK-LABEL: vfnmacc_vv_v2f16_ta:
101 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
102 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t
103 ; CHECK-NEXT: vmv1r.v v8, v10
105 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
106 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
107 %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl)
108 %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl)
109 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %b, <2 x half> %negc, <2 x i1> %allones, i32 %evl)
110 %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl)
114 define <2 x half> @vfnmacc_vf_v2f16_ta(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
115 ; CHECK-LABEL: vfnmacc_vf_v2f16_ta:
117 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
118 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
119 ; CHECK-NEXT: vmv1r.v v8, v9
121 %elt.head = insertelement <2 x half> poison, half %b, i32 0
122 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
123 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
124 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
125 %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl)
126 %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl)
127 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %nega, <2 x half> %vb, <2 x half> %negc, <2 x i1> %allones, i32 %evl)
128 %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl)
132 define <2 x half> @vfnmacc_vf_v2f16_commute_ta(<2 x half> %a, half %b, <2 x half> %c, <2 x i1> %m, i32 zeroext %evl) {
133 ; CHECK-LABEL: vfnmacc_vf_v2f16_commute_ta:
135 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu
136 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
137 ; CHECK-NEXT: vmv1r.v v8, v9
139 %elt.head = insertelement <2 x half> poison, half %b, i32 0
140 %vb = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
141 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
142 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
143 %nega = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %a, <2 x i1> %allones, i32 %evl)
144 %negc = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %c, <2 x i1> %allones, i32 %evl)
145 %v = call <2 x half> @llvm.vp.fma.v2f16(<2 x half> %vb, <2 x half> %nega, <2 x half> %negc, <2 x i1> %allones, i32 %evl)
146 %u = call <2 x half> @llvm.vp.select.v2f16(<2 x i1> %m, <2 x half> %v, <2 x half> %c, i32 %evl)
150 declare <4 x half> @llvm.vp.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x i1>, i32)
151 declare <4 x half> @llvm.vp.fneg.v4f16(<4 x half>, <4 x i1>, i32)
152 declare <4 x half> @llvm.vp.merge.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32)
153 declare <4 x half> @llvm.vp.select.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32)
155 define <4 x half> @vfnmacc_vv_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
156 ; CHECK-LABEL: vfnmacc_vv_v4f16:
158 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
159 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t
160 ; CHECK-NEXT: vmv1r.v v8, v10
162 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
163 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
164 %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl)
165 %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl)
166 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl)
167 %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl)
171 define <4 x half> @vfnmacc_vv_v4f16_unmasked(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
172 ; CHECK-LABEL: vfnmacc_vv_v4f16_unmasked:
174 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
175 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9
176 ; CHECK-NEXT: vmv1r.v v8, v10
178 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
179 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
180 %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl)
181 %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl)
182 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl)
183 %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl)
187 define <4 x half> @vfnmacc_vf_v4f16(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
188 ; CHECK-LABEL: vfnmacc_vf_v4f16:
190 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
191 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
192 ; CHECK-NEXT: vmv1r.v v8, v9
194 %elt.head = insertelement <4 x half> poison, half %b, i32 0
195 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
196 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
197 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
198 %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl)
199 %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl)
200 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl)
201 %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl)
205 define <4 x half> @vfnmacc_vf_v4f16_commute(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
206 ; CHECK-LABEL: vfnmacc_vf_v4f16_commute:
208 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
209 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
210 ; CHECK-NEXT: vmv1r.v v8, v9
212 %elt.head = insertelement <4 x half> poison, half %b, i32 0
213 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
214 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
215 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
216 %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl)
217 %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl)
218 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %nega, <4 x half> %negc, <4 x i1> %allones, i32 %evl)
219 %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl)
223 define <4 x half> @vfnmacc_vf_v4f16_unmasked(<4 x half> %a, half %b, <4 x half> %c, i32 zeroext %evl) {
224 ; CHECK-LABEL: vfnmacc_vf_v4f16_unmasked:
226 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
227 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8
228 ; CHECK-NEXT: vmv1r.v v8, v9
230 %elt.head = insertelement <4 x half> poison, half %b, i32 0
231 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
232 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
233 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
234 %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl)
235 %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl)
236 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl)
237 %u = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %allones, <4 x half> %v, <4 x half> %c, i32 %evl)
241 define <4 x half> @vfnmacc_vv_v4f16_ta(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
242 ; CHECK-LABEL: vfnmacc_vv_v4f16_ta:
244 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
245 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t
246 ; CHECK-NEXT: vmv1r.v v8, v10
248 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
249 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
250 %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl)
251 %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl)
252 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %b, <4 x half> %negc, <4 x i1> %allones, i32 %evl)
253 %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl)
257 define <4 x half> @vfnmacc_vf_v4f16_ta(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
258 ; CHECK-LABEL: vfnmacc_vf_v4f16_ta:
260 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
261 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
262 ; CHECK-NEXT: vmv1r.v v8, v9
264 %elt.head = insertelement <4 x half> poison, half %b, i32 0
265 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
266 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
267 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
268 %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl)
269 %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl)
270 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %nega, <4 x half> %vb, <4 x half> %negc, <4 x i1> %allones, i32 %evl)
271 %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl)
275 define <4 x half> @vfnmacc_vf_v4f16_commute_ta(<4 x half> %a, half %b, <4 x half> %c, <4 x i1> %m, i32 zeroext %evl) {
276 ; CHECK-LABEL: vfnmacc_vf_v4f16_commute_ta:
278 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu
279 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
280 ; CHECK-NEXT: vmv1r.v v8, v9
282 %elt.head = insertelement <4 x half> poison, half %b, i32 0
283 %vb = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
284 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
285 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
286 %nega = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %a, <4 x i1> %allones, i32 %evl)
287 %negc = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %c, <4 x i1> %allones, i32 %evl)
288 %v = call <4 x half> @llvm.vp.fma.v4f16(<4 x half> %vb, <4 x half> %nega, <4 x half> %negc, <4 x i1> %allones, i32 %evl)
289 %u = call <4 x half> @llvm.vp.select.v4f16(<4 x i1> %m, <4 x half> %v, <4 x half> %c, i32 %evl)
293 declare <8 x half> @llvm.vp.fma.v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x i1>, i32)
294 declare <8 x half> @llvm.vp.fneg.v8f16(<8 x half>, <8 x i1>, i32)
295 declare <8 x half> @llvm.vp.merge.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32)
296 declare <8 x half> @llvm.vp.select.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32)
298 define <8 x half> @vfnmacc_vv_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
299 ; CHECK-LABEL: vfnmacc_vv_v8f16:
301 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
302 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t
303 ; CHECK-NEXT: vmv1r.v v8, v10
305 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
306 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
307 %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl)
308 %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl)
309 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl)
310 %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl)
314 define <8 x half> @vfnmacc_vv_v8f16_unmasked(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
315 ; CHECK-LABEL: vfnmacc_vv_v8f16_unmasked:
317 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
318 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9
319 ; CHECK-NEXT: vmv1r.v v8, v10
321 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
322 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
323 %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl)
324 %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl)
325 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl)
326 %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl)
330 define <8 x half> @vfnmacc_vf_v8f16(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
331 ; CHECK-LABEL: vfnmacc_vf_v8f16:
333 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
334 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
335 ; CHECK-NEXT: vmv1r.v v8, v9
337 %elt.head = insertelement <8 x half> poison, half %b, i32 0
338 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
339 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
340 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
341 %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl)
342 %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl)
343 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl)
344 %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl)
348 define <8 x half> @vfnmacc_vf_v8f16_commute(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
349 ; CHECK-LABEL: vfnmacc_vf_v8f16_commute:
351 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
352 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
353 ; CHECK-NEXT: vmv1r.v v8, v9
355 %elt.head = insertelement <8 x half> poison, half %b, i32 0
356 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
357 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
358 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
359 %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl)
360 %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl)
361 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %nega, <8 x half> %negc, <8 x i1> %allones, i32 %evl)
362 %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl)
366 define <8 x half> @vfnmacc_vf_v8f16_unmasked(<8 x half> %a, half %b, <8 x half> %c, i32 zeroext %evl) {
367 ; CHECK-LABEL: vfnmacc_vf_v8f16_unmasked:
369 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
370 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8
371 ; CHECK-NEXT: vmv1r.v v8, v9
373 %elt.head = insertelement <8 x half> poison, half %b, i32 0
374 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
375 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
376 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
377 %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl)
378 %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl)
379 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl)
380 %u = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %allones, <8 x half> %v, <8 x half> %c, i32 %evl)
384 define <8 x half> @vfnmacc_vv_v8f16_ta(<8 x half> %a, <8 x half> %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
385 ; CHECK-LABEL: vfnmacc_vv_v8f16_ta:
387 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
388 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t
389 ; CHECK-NEXT: vmv.v.v v8, v10
391 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
392 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
393 %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl)
394 %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl)
395 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %b, <8 x half> %negc, <8 x i1> %allones, i32 %evl)
396 %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl)
400 define <8 x half> @vfnmacc_vf_v8f16_ta(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
401 ; CHECK-LABEL: vfnmacc_vf_v8f16_ta:
403 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
404 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
405 ; CHECK-NEXT: vmv.v.v v8, v9
407 %elt.head = insertelement <8 x half> poison, half %b, i32 0
408 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
409 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
410 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
411 %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl)
412 %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl)
413 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %nega, <8 x half> %vb, <8 x half> %negc, <8 x i1> %allones, i32 %evl)
414 %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl)
418 define <8 x half> @vfnmacc_vf_v8f16_commute_ta(<8 x half> %a, half %b, <8 x half> %c, <8 x i1> %m, i32 zeroext %evl) {
419 ; CHECK-LABEL: vfnmacc_vf_v8f16_commute_ta:
421 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu
422 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
423 ; CHECK-NEXT: vmv.v.v v8, v9
425 %elt.head = insertelement <8 x half> poison, half %b, i32 0
426 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
427 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
428 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
429 %nega = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %a, <8 x i1> %allones, i32 %evl)
430 %negc = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %c, <8 x i1> %allones, i32 %evl)
431 %v = call <8 x half> @llvm.vp.fma.v8f16(<8 x half> %vb, <8 x half> %nega, <8 x half> %negc, <8 x i1> %allones, i32 %evl)
432 %u = call <8 x half> @llvm.vp.select.v8f16(<8 x i1> %m, <8 x half> %v, <8 x half> %c, i32 %evl)
436 declare <16 x half> @llvm.vp.fma.v16f16(<16 x half>, <16 x half>, <16 x half>, <16 x i1>, i32)
437 declare <16 x half> @llvm.vp.fneg.v16f16(<16 x half>, <16 x i1>, i32)
438 declare <16 x half> @llvm.vp.merge.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32)
439 declare <16 x half> @llvm.vp.select.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32)
441 define <16 x half> @vfnmacc_vv_v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
442 ; CHECK-LABEL: vfnmacc_vv_v16f16:
444 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu
445 ; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t
446 ; CHECK-NEXT: vmv2r.v v8, v12
448 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
449 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
450 %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl)
451 %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl)
452 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl)
453 %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl)
457 define <16 x half> @vfnmacc_vv_v16f16_unmasked(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
458 ; CHECK-LABEL: vfnmacc_vv_v16f16_unmasked:
460 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
461 ; CHECK-NEXT: vfnmacc.vv v12, v8, v10
462 ; CHECK-NEXT: vmv2r.v v8, v12
464 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
465 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
466 %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl)
467 %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl)
468 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl)
469 %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl)
473 define <16 x half> @vfnmacc_vf_v16f16(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
474 ; CHECK-LABEL: vfnmacc_vf_v16f16:
476 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu
477 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t
478 ; CHECK-NEXT: vmv2r.v v8, v10
480 %elt.head = insertelement <16 x half> poison, half %b, i32 0
481 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
482 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
483 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
484 %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl)
485 %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl)
486 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl)
487 %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl)
491 define <16 x half> @vfnmacc_vf_v16f16_commute(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
492 ; CHECK-LABEL: vfnmacc_vf_v16f16_commute:
494 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu
495 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t
496 ; CHECK-NEXT: vmv2r.v v8, v10
498 %elt.head = insertelement <16 x half> poison, half %b, i32 0
499 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
500 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
501 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
502 %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl)
503 %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl)
504 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %nega, <16 x half> %negc, <16 x i1> %allones, i32 %evl)
505 %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl)
509 define <16 x half> @vfnmacc_vf_v16f16_unmasked(<16 x half> %a, half %b, <16 x half> %c, i32 zeroext %evl) {
510 ; CHECK-LABEL: vfnmacc_vf_v16f16_unmasked:
512 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma
513 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8
514 ; CHECK-NEXT: vmv2r.v v8, v10
516 %elt.head = insertelement <16 x half> poison, half %b, i32 0
517 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
518 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
519 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
520 %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl)
521 %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl)
522 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl)
523 %u = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %allones, <16 x half> %v, <16 x half> %c, i32 %evl)
527 define <16 x half> @vfnmacc_vv_v16f16_ta(<16 x half> %a, <16 x half> %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
528 ; CHECK-LABEL: vfnmacc_vv_v16f16_ta:
530 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
531 ; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t
532 ; CHECK-NEXT: vmv.v.v v8, v12
534 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
535 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
536 %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl)
537 %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl)
538 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %b, <16 x half> %negc, <16 x i1> %allones, i32 %evl)
539 %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl)
543 define <16 x half> @vfnmacc_vf_v16f16_ta(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
544 ; CHECK-LABEL: vfnmacc_vf_v16f16_ta:
546 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
547 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t
548 ; CHECK-NEXT: vmv.v.v v8, v10
550 %elt.head = insertelement <16 x half> poison, half %b, i32 0
551 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
552 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
553 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
554 %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl)
555 %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl)
556 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %nega, <16 x half> %vb, <16 x half> %negc, <16 x i1> %allones, i32 %evl)
557 %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl)
561 define <16 x half> @vfnmacc_vf_v16f16_commute_ta(<16 x half> %a, half %b, <16 x half> %c, <16 x i1> %m, i32 zeroext %evl) {
562 ; CHECK-LABEL: vfnmacc_vf_v16f16_commute_ta:
564 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu
565 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t
566 ; CHECK-NEXT: vmv.v.v v8, v10
568 %elt.head = insertelement <16 x half> poison, half %b, i32 0
569 %vb = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
570 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
571 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
572 %nega = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %a, <16 x i1> %allones, i32 %evl)
573 %negc = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %c, <16 x i1> %allones, i32 %evl)
574 %v = call <16 x half> @llvm.vp.fma.v16f16(<16 x half> %vb, <16 x half> %nega, <16 x half> %negc, <16 x i1> %allones, i32 %evl)
575 %u = call <16 x half> @llvm.vp.select.v16f16(<16 x i1> %m, <16 x half> %v, <16 x half> %c, i32 %evl)
579 declare <32 x half> @llvm.vp.fma.v32f16(<32 x half>, <32 x half>, <32 x half>, <32 x i1>, i32)
580 declare <32 x half> @llvm.vp.fneg.v32f16(<32 x half>, <32 x i1>, i32)
581 declare <32 x half> @llvm.vp.merge.v32f16(<32 x i1>, <32 x half>, <32 x half>, i32)
582 declare <32 x half> @llvm.vp.select.v32f16(<32 x i1>, <32 x half>, <32 x half>, i32)
584 define <32 x half> @vfnmacc_vv_v32f16(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) {
585 ; CHECK-LABEL: vfnmacc_vv_v32f16:
587 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu
588 ; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t
589 ; CHECK-NEXT: vmv4r.v v8, v16
591 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
592 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
593 %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl)
594 %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl)
595 %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl)
596 %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl)
600 define <32 x half> @vfnmacc_vv_v32f16_unmasked(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) {
601 ; CHECK-LABEL: vfnmacc_vv_v32f16_unmasked:
603 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma
604 ; CHECK-NEXT: vfnmacc.vv v16, v8, v12
605 ; CHECK-NEXT: vmv4r.v v8, v16
607 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
608 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
609 %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl)
610 %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl)
611 %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl)
612 %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl)
616 define <32 x half> @vfnmacc_vf_v32f16(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) {
617 ; CHECK-LABEL: vfnmacc_vf_v32f16:
619 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu
620 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t
621 ; CHECK-NEXT: vmv4r.v v8, v12
623 %elt.head = insertelement <32 x half> poison, half %b, i32 0
624 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer
625 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
626 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
627 %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl)
628 %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl)
629 %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl)
630 %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl)
634 define <32 x half> @vfnmacc_vf_v32f16_commute(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) {
635 ; CHECK-LABEL: vfnmacc_vf_v32f16_commute:
637 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu
638 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t
639 ; CHECK-NEXT: vmv4r.v v8, v12
641 %elt.head = insertelement <32 x half> poison, half %b, i32 0
642 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer
643 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
644 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
645 %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl)
646 %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl)
647 %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %nega, <32 x half> %negc, <32 x i1> %allones, i32 %evl)
648 %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl)
652 define <32 x half> @vfnmacc_vf_v32f16_unmasked(<32 x half> %a, half %b, <32 x half> %c, i32 zeroext %evl) {
653 ; CHECK-LABEL: vfnmacc_vf_v32f16_unmasked:
655 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma
656 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8
657 ; CHECK-NEXT: vmv4r.v v8, v12
659 %elt.head = insertelement <32 x half> poison, half %b, i32 0
660 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer
661 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
662 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
663 %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl)
664 %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl)
665 %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl)
666 %u = call <32 x half> @llvm.vp.merge.v32f16(<32 x i1> %allones, <32 x half> %v, <32 x half> %c, i32 %evl)
670 define <32 x half> @vfnmacc_vv_v32f16_ta(<32 x half> %a, <32 x half> %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) {
671 ; CHECK-LABEL: vfnmacc_vv_v32f16_ta:
673 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
674 ; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t
675 ; CHECK-NEXT: vmv.v.v v8, v16
677 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
678 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
679 %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl)
680 %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl)
681 %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %b, <32 x half> %negc, <32 x i1> %allones, i32 %evl)
682 %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl)
686 define <32 x half> @vfnmacc_vf_v32f16_ta(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) {
687 ; CHECK-LABEL: vfnmacc_vf_v32f16_ta:
689 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
690 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t
691 ; CHECK-NEXT: vmv.v.v v8, v12
693 %elt.head = insertelement <32 x half> poison, half %b, i32 0
694 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer
695 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
696 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
697 %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl)
698 %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl)
699 %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %nega, <32 x half> %vb, <32 x half> %negc, <32 x i1> %allones, i32 %evl)
700 %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl)
704 define <32 x half> @vfnmacc_vf_v32f16_commute_ta(<32 x half> %a, half %b, <32 x half> %c, <32 x i1> %m, i32 zeroext %evl) {
705 ; CHECK-LABEL: vfnmacc_vf_v32f16_commute_ta:
707 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu
708 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t
709 ; CHECK-NEXT: vmv.v.v v8, v12
711 %elt.head = insertelement <32 x half> poison, half %b, i32 0
712 %vb = shufflevector <32 x half> %elt.head, <32 x half> poison, <32 x i32> zeroinitializer
713 %splat = insertelement <32 x i1> poison, i1 -1, i32 0
714 %allones = shufflevector <32 x i1> %splat, <32 x i1> poison, <32 x i32> zeroinitializer
715 %nega = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %a, <32 x i1> %allones, i32 %evl)
716 %negc = call <32 x half> @llvm.vp.fneg.v32f16(<32 x half> %c, <32 x i1> %allones, i32 %evl)
717 %v = call <32 x half> @llvm.vp.fma.v32f16(<32 x half> %vb, <32 x half> %nega, <32 x half> %negc, <32 x i1> %allones, i32 %evl)
718 %u = call <32 x half> @llvm.vp.select.v32f16(<32 x i1> %m, <32 x half> %v, <32 x half> %c, i32 %evl)
722 declare <2 x float> @llvm.vp.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, <2 x i1>, i32)
723 declare <2 x float> @llvm.vp.fneg.v2f32(<2 x float>, <2 x i1>, i32)
724 declare <2 x float> @llvm.vp.merge.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32)
725 declare <2 x float> @llvm.vp.select.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32)
727 define <2 x float> @vfnmacc_vv_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
728 ; CHECK-LABEL: vfnmacc_vv_v2f32:
730 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu
731 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t
732 ; CHECK-NEXT: vmv1r.v v8, v10
734 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
735 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
736 %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl)
737 %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl)
738 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl)
739 %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl)
743 define <2 x float> @vfnmacc_vv_v2f32_unmasked(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
744 ; CHECK-LABEL: vfnmacc_vv_v2f32_unmasked:
746 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
747 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9
748 ; CHECK-NEXT: vmv1r.v v8, v10
750 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
751 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
752 %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl)
753 %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl)
754 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl)
755 %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl)
759 define <2 x float> @vfnmacc_vf_v2f32(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
760 ; CHECK-LABEL: vfnmacc_vf_v2f32:
762 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu
763 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
764 ; CHECK-NEXT: vmv1r.v v8, v9
766 %elt.head = insertelement <2 x float> poison, float %b, i32 0
767 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
768 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
769 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
770 %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl)
771 %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl)
772 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl)
773 %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl)
777 define <2 x float> @vfnmacc_vf_v2f32_commute(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
778 ; CHECK-LABEL: vfnmacc_vf_v2f32_commute:
780 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, mu
781 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
782 ; CHECK-NEXT: vmv1r.v v8, v9
784 %elt.head = insertelement <2 x float> poison, float %b, i32 0
785 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
786 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
787 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
788 %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl)
789 %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl)
790 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %nega, <2 x float> %negc, <2 x i1> %allones, i32 %evl)
791 %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl)
795 define <2 x float> @vfnmacc_vf_v2f32_unmasked(<2 x float> %a, float %b, <2 x float> %c, i32 zeroext %evl) {
796 ; CHECK-LABEL: vfnmacc_vf_v2f32_unmasked:
798 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, tu, ma
799 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8
800 ; CHECK-NEXT: vmv1r.v v8, v9
802 %elt.head = insertelement <2 x float> poison, float %b, i32 0
803 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
804 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
805 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
806 %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl)
807 %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl)
808 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl)
809 %u = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %allones, <2 x float> %v, <2 x float> %c, i32 %evl)
813 define <2 x float> @vfnmacc_vv_v2f32_ta(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
814 ; CHECK-LABEL: vfnmacc_vv_v2f32_ta:
816 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
817 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t
818 ; CHECK-NEXT: vmv1r.v v8, v10
820 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
821 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
822 %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl)
823 %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl)
824 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %b, <2 x float> %negc, <2 x i1> %allones, i32 %evl)
825 %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl)
829 define <2 x float> @vfnmacc_vf_v2f32_ta(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
830 ; CHECK-LABEL: vfnmacc_vf_v2f32_ta:
832 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
833 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
834 ; CHECK-NEXT: vmv1r.v v8, v9
836 %elt.head = insertelement <2 x float> poison, float %b, i32 0
837 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
838 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
839 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
840 %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl)
841 %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl)
842 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %nega, <2 x float> %vb, <2 x float> %negc, <2 x i1> %allones, i32 %evl)
843 %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl)
847 define <2 x float> @vfnmacc_vf_v2f32_commute_ta(<2 x float> %a, float %b, <2 x float> %c, <2 x i1> %m, i32 zeroext %evl) {
848 ; CHECK-LABEL: vfnmacc_vf_v2f32_commute_ta:
850 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu
851 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
852 ; CHECK-NEXT: vmv1r.v v8, v9
854 %elt.head = insertelement <2 x float> poison, float %b, i32 0
855 %vb = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
856 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
857 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
858 %nega = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %a, <2 x i1> %allones, i32 %evl)
859 %negc = call <2 x float> @llvm.vp.fneg.v2f32(<2 x float> %c, <2 x i1> %allones, i32 %evl)
860 %v = call <2 x float> @llvm.vp.fma.v2f32(<2 x float> %vb, <2 x float> %nega, <2 x float> %negc, <2 x i1> %allones, i32 %evl)
861 %u = call <2 x float> @llvm.vp.select.v2f32(<2 x i1> %m, <2 x float> %v, <2 x float> %c, i32 %evl)
865 declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
866 declare <4 x float> @llvm.vp.fneg.v4f32(<4 x float>, <4 x i1>, i32)
867 declare <4 x float> @llvm.vp.merge.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32)
868 declare <4 x float> @llvm.vp.select.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32)
870 define <4 x float> @vfnmacc_vv_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
871 ; CHECK-LABEL: vfnmacc_vv_v4f32:
873 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
874 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t
875 ; CHECK-NEXT: vmv1r.v v8, v10
877 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
878 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
879 %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl)
880 %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl)
881 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl)
882 %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl)
886 define <4 x float> @vfnmacc_vv_v4f32_unmasked(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
887 ; CHECK-LABEL: vfnmacc_vv_v4f32_unmasked:
889 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
890 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9
891 ; CHECK-NEXT: vmv1r.v v8, v10
893 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
894 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
895 %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl)
896 %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl)
897 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl)
898 %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl)
902 define <4 x float> @vfnmacc_vf_v4f32(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
903 ; CHECK-LABEL: vfnmacc_vf_v4f32:
905 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
906 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
907 ; CHECK-NEXT: vmv1r.v v8, v9
909 %elt.head = insertelement <4 x float> poison, float %b, i32 0
910 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
911 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
912 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
913 %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl)
914 %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl)
915 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl)
916 %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl)
920 define <4 x float> @vfnmacc_vf_v4f32_commute(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
921 ; CHECK-LABEL: vfnmacc_vf_v4f32_commute:
923 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, mu
924 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
925 ; CHECK-NEXT: vmv1r.v v8, v9
927 %elt.head = insertelement <4 x float> poison, float %b, i32 0
928 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
929 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
930 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
931 %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl)
932 %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl)
933 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %nega, <4 x float> %negc, <4 x i1> %allones, i32 %evl)
934 %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl)
938 define <4 x float> @vfnmacc_vf_v4f32_unmasked(<4 x float> %a, float %b, <4 x float> %c, i32 zeroext %evl) {
939 ; CHECK-LABEL: vfnmacc_vf_v4f32_unmasked:
941 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, tu, ma
942 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8
943 ; CHECK-NEXT: vmv1r.v v8, v9
945 %elt.head = insertelement <4 x float> poison, float %b, i32 0
946 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
947 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
948 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
949 %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl)
950 %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl)
951 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl)
952 %u = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %allones, <4 x float> %v, <4 x float> %c, i32 %evl)
956 define <4 x float> @vfnmacc_vv_v4f32_ta(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
957 ; CHECK-LABEL: vfnmacc_vv_v4f32_ta:
959 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
960 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t
961 ; CHECK-NEXT: vmv.v.v v8, v10
963 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
964 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
965 %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl)
966 %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl)
967 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %b, <4 x float> %negc, <4 x i1> %allones, i32 %evl)
968 %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl)
972 define <4 x float> @vfnmacc_vf_v4f32_ta(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
973 ; CHECK-LABEL: vfnmacc_vf_v4f32_ta:
975 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
976 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
977 ; CHECK-NEXT: vmv.v.v v8, v9
979 %elt.head = insertelement <4 x float> poison, float %b, i32 0
980 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
981 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
982 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
983 %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl)
984 %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl)
985 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %nega, <4 x float> %vb, <4 x float> %negc, <4 x i1> %allones, i32 %evl)
986 %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl)
990 define <4 x float> @vfnmacc_vf_v4f32_commute_ta(<4 x float> %a, float %b, <4 x float> %c, <4 x i1> %m, i32 zeroext %evl) {
991 ; CHECK-LABEL: vfnmacc_vf_v4f32_commute_ta:
993 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu
994 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
995 ; CHECK-NEXT: vmv.v.v v8, v9
997 %elt.head = insertelement <4 x float> poison, float %b, i32 0
998 %vb = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
999 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1000 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1001 %nega = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a, <4 x i1> %allones, i32 %evl)
1002 %negc = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %c, <4 x i1> %allones, i32 %evl)
1003 %v = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %vb, <4 x float> %nega, <4 x float> %negc, <4 x i1> %allones, i32 %evl)
1004 %u = call <4 x float> @llvm.vp.select.v4f32(<4 x i1> %m, <4 x float> %v, <4 x float> %c, i32 %evl)
1008 declare <8 x float> @llvm.vp.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, <8 x i1>, i32)
1009 declare <8 x float> @llvm.vp.fneg.v8f32(<8 x float>, <8 x i1>, i32)
1010 declare <8 x float> @llvm.vp.merge.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32)
1011 declare <8 x float> @llvm.vp.select.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32)
1013 define <8 x float> @vfnmacc_vv_v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
1014 ; CHECK-LABEL: vfnmacc_vv_v8f32:
1016 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu
1017 ; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t
1018 ; CHECK-NEXT: vmv2r.v v8, v12
1020 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1021 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1022 %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl)
1023 %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl)
1024 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl)
1025 %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl)
1029 define <8 x float> @vfnmacc_vv_v8f32_unmasked(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
1030 ; CHECK-LABEL: vfnmacc_vv_v8f32_unmasked:
1032 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
1033 ; CHECK-NEXT: vfnmacc.vv v12, v8, v10
1034 ; CHECK-NEXT: vmv2r.v v8, v12
1036 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1037 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1038 %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl)
1039 %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl)
1040 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl)
1041 %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl)
1045 define <8 x float> @vfnmacc_vf_v8f32(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
1046 ; CHECK-LABEL: vfnmacc_vf_v8f32:
1048 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu
1049 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t
1050 ; CHECK-NEXT: vmv2r.v v8, v10
1052 %elt.head = insertelement <8 x float> poison, float %b, i32 0
1053 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
1054 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1055 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1056 %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl)
1057 %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl)
1058 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl)
1059 %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl)
1063 define <8 x float> @vfnmacc_vf_v8f32_commute(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
1064 ; CHECK-LABEL: vfnmacc_vf_v8f32_commute:
1066 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, mu
1067 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t
1068 ; CHECK-NEXT: vmv2r.v v8, v10
1070 %elt.head = insertelement <8 x float> poison, float %b, i32 0
1071 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
1072 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1073 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1074 %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl)
1075 %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl)
1076 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %nega, <8 x float> %negc, <8 x i1> %allones, i32 %evl)
1077 %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl)
1081 define <8 x float> @vfnmacc_vf_v8f32_unmasked(<8 x float> %a, float %b, <8 x float> %c, i32 zeroext %evl) {
1082 ; CHECK-LABEL: vfnmacc_vf_v8f32_unmasked:
1084 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
1085 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8
1086 ; CHECK-NEXT: vmv2r.v v8, v10
1088 %elt.head = insertelement <8 x float> poison, float %b, i32 0
1089 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
1090 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1091 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1092 %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl)
1093 %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl)
1094 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl)
1095 %u = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %allones, <8 x float> %v, <8 x float> %c, i32 %evl)
1099 define <8 x float> @vfnmacc_vv_v8f32_ta(<8 x float> %a, <8 x float> %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
1100 ; CHECK-LABEL: vfnmacc_vv_v8f32_ta:
1102 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
1103 ; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t
1104 ; CHECK-NEXT: vmv.v.v v8, v12
1106 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1107 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1108 %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl)
1109 %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl)
1110 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %b, <8 x float> %negc, <8 x i1> %allones, i32 %evl)
1111 %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl)
1115 define <8 x float> @vfnmacc_vf_v8f32_ta(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
1116 ; CHECK-LABEL: vfnmacc_vf_v8f32_ta:
1118 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
1119 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t
1120 ; CHECK-NEXT: vmv.v.v v8, v10
1122 %elt.head = insertelement <8 x float> poison, float %b, i32 0
1123 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
1124 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1125 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1126 %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl)
1127 %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl)
1128 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %nega, <8 x float> %vb, <8 x float> %negc, <8 x i1> %allones, i32 %evl)
1129 %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl)
1133 define <8 x float> @vfnmacc_vf_v8f32_commute_ta(<8 x float> %a, float %b, <8 x float> %c, <8 x i1> %m, i32 zeroext %evl) {
1134 ; CHECK-LABEL: vfnmacc_vf_v8f32_commute_ta:
1136 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu
1137 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t
1138 ; CHECK-NEXT: vmv.v.v v8, v10
1140 %elt.head = insertelement <8 x float> poison, float %b, i32 0
1141 %vb = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
1142 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1143 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1144 %nega = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %a, <8 x i1> %allones, i32 %evl)
1145 %negc = call <8 x float> @llvm.vp.fneg.v8f32(<8 x float> %c, <8 x i1> %allones, i32 %evl)
1146 %v = call <8 x float> @llvm.vp.fma.v8f32(<8 x float> %vb, <8 x float> %nega, <8 x float> %negc, <8 x i1> %allones, i32 %evl)
1147 %u = call <8 x float> @llvm.vp.select.v8f32(<8 x i1> %m, <8 x float> %v, <8 x float> %c, i32 %evl)
1151 declare <16 x float> @llvm.vp.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, <16 x i1>, i32)
1152 declare <16 x float> @llvm.vp.fneg.v16f32(<16 x float>, <16 x i1>, i32)
1153 declare <16 x float> @llvm.vp.merge.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32)
1154 declare <16 x float> @llvm.vp.select.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32)
1156 define <16 x float> @vfnmacc_vv_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
1157 ; CHECK-LABEL: vfnmacc_vv_v16f32:
1159 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu
1160 ; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t
1161 ; CHECK-NEXT: vmv4r.v v8, v16
1163 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1164 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1165 %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl)
1166 %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl)
1167 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl)
1168 %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl)
1172 define <16 x float> @vfnmacc_vv_v16f32_unmasked(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
1173 ; CHECK-LABEL: vfnmacc_vv_v16f32_unmasked:
1175 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
1176 ; CHECK-NEXT: vfnmacc.vv v16, v8, v12
1177 ; CHECK-NEXT: vmv4r.v v8, v16
1179 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1180 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1181 %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl)
1182 %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl)
1183 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl)
1184 %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl)
1188 define <16 x float> @vfnmacc_vf_v16f32(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
1189 ; CHECK-LABEL: vfnmacc_vf_v16f32:
1191 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu
1192 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t
1193 ; CHECK-NEXT: vmv4r.v v8, v12
1195 %elt.head = insertelement <16 x float> poison, float %b, i32 0
1196 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
1197 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1198 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1199 %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl)
1200 %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl)
1201 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl)
1202 %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl)
1206 define <16 x float> @vfnmacc_vf_v16f32_commute(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
1207 ; CHECK-LABEL: vfnmacc_vf_v16f32_commute:
1209 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, mu
1210 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t
1211 ; CHECK-NEXT: vmv4r.v v8, v12
1213 %elt.head = insertelement <16 x float> poison, float %b, i32 0
1214 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
1215 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1216 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1217 %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl)
1218 %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl)
1219 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %nega, <16 x float> %negc, <16 x i1> %allones, i32 %evl)
1220 %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl)
1224 define <16 x float> @vfnmacc_vf_v16f32_unmasked(<16 x float> %a, float %b, <16 x float> %c, i32 zeroext %evl) {
1225 ; CHECK-LABEL: vfnmacc_vf_v16f32_unmasked:
1227 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma
1228 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8
1229 ; CHECK-NEXT: vmv4r.v v8, v12
1231 %elt.head = insertelement <16 x float> poison, float %b, i32 0
1232 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
1233 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1234 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1235 %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl)
1236 %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl)
1237 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl)
1238 %u = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %allones, <16 x float> %v, <16 x float> %c, i32 %evl)
1242 define <16 x float> @vfnmacc_vv_v16f32_ta(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
1243 ; CHECK-LABEL: vfnmacc_vv_v16f32_ta:
1245 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
1246 ; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t
1247 ; CHECK-NEXT: vmv.v.v v8, v16
1249 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1250 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1251 %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl)
1252 %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl)
1253 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %b, <16 x float> %negc, <16 x i1> %allones, i32 %evl)
1254 %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl)
1258 define <16 x float> @vfnmacc_vf_v16f32_ta(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
1259 ; CHECK-LABEL: vfnmacc_vf_v16f32_ta:
1261 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
1262 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t
1263 ; CHECK-NEXT: vmv.v.v v8, v12
1265 %elt.head = insertelement <16 x float> poison, float %b, i32 0
1266 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
1267 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1268 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1269 %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl)
1270 %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl)
1271 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %nega, <16 x float> %vb, <16 x float> %negc, <16 x i1> %allones, i32 %evl)
1272 %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl)
1276 define <16 x float> @vfnmacc_vf_v16f32_commute_ta(<16 x float> %a, float %b, <16 x float> %c, <16 x i1> %m, i32 zeroext %evl) {
1277 ; CHECK-LABEL: vfnmacc_vf_v16f32_commute_ta:
1279 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu
1280 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t
1281 ; CHECK-NEXT: vmv.v.v v8, v12
1283 %elt.head = insertelement <16 x float> poison, float %b, i32 0
1284 %vb = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
1285 %splat = insertelement <16 x i1> poison, i1 -1, i32 0
1286 %allones = shufflevector <16 x i1> %splat, <16 x i1> poison, <16 x i32> zeroinitializer
1287 %nega = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %a, <16 x i1> %allones, i32 %evl)
1288 %negc = call <16 x float> @llvm.vp.fneg.v16f32(<16 x float> %c, <16 x i1> %allones, i32 %evl)
1289 %v = call <16 x float> @llvm.vp.fma.v16f32(<16 x float> %vb, <16 x float> %nega, <16 x float> %negc, <16 x i1> %allones, i32 %evl)
1290 %u = call <16 x float> @llvm.vp.select.v16f32(<16 x i1> %m, <16 x float> %v, <16 x float> %c, i32 %evl)
1294 declare <2 x double> @llvm.vp.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, <2 x i1>, i32)
1295 declare <2 x double> @llvm.vp.fneg.v2f64(<2 x double>, <2 x i1>, i32)
1296 declare <2 x double> @llvm.vp.merge.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32)
1297 declare <2 x double> @llvm.vp.select.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32)
1299 define <2 x double> @vfnmacc_vv_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
1300 ; CHECK-LABEL: vfnmacc_vv_v2f64:
1302 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu
1303 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t
1304 ; CHECK-NEXT: vmv1r.v v8, v10
1306 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1307 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1308 %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl)
1309 %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl)
1310 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl)
1311 %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl)
1315 define <2 x double> @vfnmacc_vv_v2f64_unmasked(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
1316 ; CHECK-LABEL: vfnmacc_vv_v2f64_unmasked:
1318 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma
1319 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9
1320 ; CHECK-NEXT: vmv1r.v v8, v10
1322 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1323 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1324 %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl)
1325 %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl)
1326 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl)
1327 %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl)
1331 define <2 x double> @vfnmacc_vf_v2f64(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
1332 ; CHECK-LABEL: vfnmacc_vf_v2f64:
1334 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu
1335 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
1336 ; CHECK-NEXT: vmv1r.v v8, v9
1338 %elt.head = insertelement <2 x double> poison, double %b, i32 0
1339 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
1340 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1341 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1342 %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl)
1343 %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl)
1344 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl)
1345 %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl)
1349 define <2 x double> @vfnmacc_vf_v2f64_commute(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
1350 ; CHECK-LABEL: vfnmacc_vf_v2f64_commute:
1352 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, mu
1353 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
1354 ; CHECK-NEXT: vmv1r.v v8, v9
1356 %elt.head = insertelement <2 x double> poison, double %b, i32 0
1357 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
1358 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1359 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1360 %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl)
1361 %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl)
1362 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %nega, <2 x double> %negc, <2 x i1> %allones, i32 %evl)
1363 %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl)
1367 define <2 x double> @vfnmacc_vf_v2f64_unmasked(<2 x double> %a, double %b, <2 x double> %c, i32 zeroext %evl) {
1368 ; CHECK-LABEL: vfnmacc_vf_v2f64_unmasked:
1370 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, tu, ma
1371 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8
1372 ; CHECK-NEXT: vmv1r.v v8, v9
1374 %elt.head = insertelement <2 x double> poison, double %b, i32 0
1375 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
1376 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1377 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1378 %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl)
1379 %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl)
1380 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl)
1381 %u = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %allones, <2 x double> %v, <2 x double> %c, i32 %evl)
1385 define <2 x double> @vfnmacc_vv_v2f64_ta(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
1386 ; CHECK-LABEL: vfnmacc_vv_v2f64_ta:
1388 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
1389 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9, v0.t
1390 ; CHECK-NEXT: vmv.v.v v8, v10
1392 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1393 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1394 %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl)
1395 %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl)
1396 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %b, <2 x double> %negc, <2 x i1> %allones, i32 %evl)
1397 %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl)
1401 define <2 x double> @vfnmacc_vf_v2f64_ta(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
1402 ; CHECK-LABEL: vfnmacc_vf_v2f64_ta:
1404 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
1405 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
1406 ; CHECK-NEXT: vmv.v.v v8, v9
1408 %elt.head = insertelement <2 x double> poison, double %b, i32 0
1409 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
1410 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1411 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1412 %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl)
1413 %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl)
1414 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %nega, <2 x double> %vb, <2 x double> %negc, <2 x i1> %allones, i32 %evl)
1415 %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl)
1419 define <2 x double> @vfnmacc_vf_v2f64_commute_ta(<2 x double> %a, double %b, <2 x double> %c, <2 x i1> %m, i32 zeroext %evl) {
1420 ; CHECK-LABEL: vfnmacc_vf_v2f64_commute_ta:
1422 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu
1423 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8, v0.t
1424 ; CHECK-NEXT: vmv.v.v v8, v9
1426 %elt.head = insertelement <2 x double> poison, double %b, i32 0
1427 %vb = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
1428 %splat = insertelement <2 x i1> poison, i1 -1, i32 0
1429 %allones = shufflevector <2 x i1> %splat, <2 x i1> poison, <2 x i32> zeroinitializer
1430 %nega = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %a, <2 x i1> %allones, i32 %evl)
1431 %negc = call <2 x double> @llvm.vp.fneg.v2f64(<2 x double> %c, <2 x i1> %allones, i32 %evl)
1432 %v = call <2 x double> @llvm.vp.fma.v2f64(<2 x double> %vb, <2 x double> %nega, <2 x double> %negc, <2 x i1> %allones, i32 %evl)
1433 %u = call <2 x double> @llvm.vp.select.v2f64(<2 x i1> %m, <2 x double> %v, <2 x double> %c, i32 %evl)
1437 declare <4 x double> @llvm.vp.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, <4 x i1>, i32)
1438 declare <4 x double> @llvm.vp.fneg.v4f64(<4 x double>, <4 x i1>, i32)
1439 declare <4 x double> @llvm.vp.merge.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32)
1440 declare <4 x double> @llvm.vp.select.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32)
1442 define <4 x double> @vfnmacc_vv_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
1443 ; CHECK-LABEL: vfnmacc_vv_v4f64:
1445 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu
1446 ; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t
1447 ; CHECK-NEXT: vmv2r.v v8, v12
1449 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1450 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1451 %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl)
1452 %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl)
1453 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl)
1454 %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl)
1458 define <4 x double> @vfnmacc_vv_v4f64_unmasked(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
1459 ; CHECK-LABEL: vfnmacc_vv_v4f64_unmasked:
1461 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma
1462 ; CHECK-NEXT: vfnmacc.vv v12, v8, v10
1463 ; CHECK-NEXT: vmv2r.v v8, v12
1465 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1466 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1467 %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl)
1468 %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl)
1469 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl)
1470 %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl)
1474 define <4 x double> @vfnmacc_vf_v4f64(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
1475 ; CHECK-LABEL: vfnmacc_vf_v4f64:
1477 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu
1478 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t
1479 ; CHECK-NEXT: vmv2r.v v8, v10
1481 %elt.head = insertelement <4 x double> poison, double %b, i32 0
1482 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
1483 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1484 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1485 %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl)
1486 %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl)
1487 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl)
1488 %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl)
1492 define <4 x double> @vfnmacc_vf_v4f64_commute(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
1493 ; CHECK-LABEL: vfnmacc_vf_v4f64_commute:
1495 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, mu
1496 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t
1497 ; CHECK-NEXT: vmv2r.v v8, v10
1499 %elt.head = insertelement <4 x double> poison, double %b, i32 0
1500 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
1501 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1502 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1503 %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl)
1504 %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl)
1505 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %nega, <4 x double> %negc, <4 x i1> %allones, i32 %evl)
1506 %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl)
1510 define <4 x double> @vfnmacc_vf_v4f64_unmasked(<4 x double> %a, double %b, <4 x double> %c, i32 zeroext %evl) {
1511 ; CHECK-LABEL: vfnmacc_vf_v4f64_unmasked:
1513 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma
1514 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8
1515 ; CHECK-NEXT: vmv2r.v v8, v10
1517 %elt.head = insertelement <4 x double> poison, double %b, i32 0
1518 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
1519 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1520 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1521 %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl)
1522 %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl)
1523 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl)
1524 %u = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %allones, <4 x double> %v, <4 x double> %c, i32 %evl)
1528 define <4 x double> @vfnmacc_vv_v4f64_ta(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
1529 ; CHECK-LABEL: vfnmacc_vv_v4f64_ta:
1531 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
1532 ; CHECK-NEXT: vfnmacc.vv v12, v8, v10, v0.t
1533 ; CHECK-NEXT: vmv.v.v v8, v12
1535 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1536 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1537 %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl)
1538 %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl)
1539 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %b, <4 x double> %negc, <4 x i1> %allones, i32 %evl)
1540 %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl)
1544 define <4 x double> @vfnmacc_vf_v4f64_ta(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
1545 ; CHECK-LABEL: vfnmacc_vf_v4f64_ta:
1547 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
1548 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t
1549 ; CHECK-NEXT: vmv.v.v v8, v10
1551 %elt.head = insertelement <4 x double> poison, double %b, i32 0
1552 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
1553 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1554 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1555 %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl)
1556 %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl)
1557 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %nega, <4 x double> %vb, <4 x double> %negc, <4 x i1> %allones, i32 %evl)
1558 %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl)
1562 define <4 x double> @vfnmacc_vf_v4f64_commute_ta(<4 x double> %a, double %b, <4 x double> %c, <4 x i1> %m, i32 zeroext %evl) {
1563 ; CHECK-LABEL: vfnmacc_vf_v4f64_commute_ta:
1565 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu
1566 ; CHECK-NEXT: vfnmacc.vf v10, fa0, v8, v0.t
1567 ; CHECK-NEXT: vmv.v.v v8, v10
1569 %elt.head = insertelement <4 x double> poison, double %b, i32 0
1570 %vb = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
1571 %splat = insertelement <4 x i1> poison, i1 -1, i32 0
1572 %allones = shufflevector <4 x i1> %splat, <4 x i1> poison, <4 x i32> zeroinitializer
1573 %nega = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %a, <4 x i1> %allones, i32 %evl)
1574 %negc = call <4 x double> @llvm.vp.fneg.v4f64(<4 x double> %c, <4 x i1> %allones, i32 %evl)
1575 %v = call <4 x double> @llvm.vp.fma.v4f64(<4 x double> %vb, <4 x double> %nega, <4 x double> %negc, <4 x i1> %allones, i32 %evl)
1576 %u = call <4 x double> @llvm.vp.select.v4f64(<4 x i1> %m, <4 x double> %v, <4 x double> %c, i32 %evl)
1580 declare <8 x double> @llvm.vp.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, <8 x i1>, i32)
1581 declare <8 x double> @llvm.vp.fneg.v8f64(<8 x double>, <8 x i1>, i32)
1582 declare <8 x double> @llvm.vp.merge.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32)
1583 declare <8 x double> @llvm.vp.select.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32)
1585 define <8 x double> @vfnmacc_vv_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
1586 ; CHECK-LABEL: vfnmacc_vv_v8f64:
1588 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu
1589 ; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t
1590 ; CHECK-NEXT: vmv4r.v v8, v16
1592 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1593 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1594 %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl)
1595 %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl)
1596 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl)
1597 %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl)
1601 define <8 x double> @vfnmacc_vv_v8f64_unmasked(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
1602 ; CHECK-LABEL: vfnmacc_vv_v8f64_unmasked:
1604 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma
1605 ; CHECK-NEXT: vfnmacc.vv v16, v8, v12
1606 ; CHECK-NEXT: vmv4r.v v8, v16
1608 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1609 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1610 %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl)
1611 %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl)
1612 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl)
1613 %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl)
1617 define <8 x double> @vfnmacc_vf_v8f64(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
1618 ; CHECK-LABEL: vfnmacc_vf_v8f64:
1620 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu
1621 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t
1622 ; CHECK-NEXT: vmv4r.v v8, v12
1624 %elt.head = insertelement <8 x double> poison, double %b, i32 0
1625 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
1626 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1627 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1628 %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl)
1629 %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl)
1630 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl)
1631 %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl)
1635 define <8 x double> @vfnmacc_vf_v8f64_commute(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
1636 ; CHECK-LABEL: vfnmacc_vf_v8f64_commute:
1638 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, mu
1639 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t
1640 ; CHECK-NEXT: vmv4r.v v8, v12
1642 %elt.head = insertelement <8 x double> poison, double %b, i32 0
1643 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
1644 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1645 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1646 %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl)
1647 %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl)
1648 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %nega, <8 x double> %negc, <8 x i1> %allones, i32 %evl)
1649 %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl)
1653 define <8 x double> @vfnmacc_vf_v8f64_unmasked(<8 x double> %a, double %b, <8 x double> %c, i32 zeroext %evl) {
1654 ; CHECK-LABEL: vfnmacc_vf_v8f64_unmasked:
1656 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma
1657 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8
1658 ; CHECK-NEXT: vmv4r.v v8, v12
1660 %elt.head = insertelement <8 x double> poison, double %b, i32 0
1661 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
1662 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1663 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1664 %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl)
1665 %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl)
1666 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl)
1667 %u = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %allones, <8 x double> %v, <8 x double> %c, i32 %evl)
1671 define <8 x double> @vfnmacc_vv_v8f64_ta(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
1672 ; CHECK-LABEL: vfnmacc_vv_v8f64_ta:
1674 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
1675 ; CHECK-NEXT: vfnmacc.vv v16, v8, v12, v0.t
1676 ; CHECK-NEXT: vmv.v.v v8, v16
1678 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1679 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1680 %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl)
1681 %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl)
1682 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %b, <8 x double> %negc, <8 x i1> %allones, i32 %evl)
1683 %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl)
1687 define <8 x double> @vfnmacc_vf_v8f64_ta(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
1688 ; CHECK-LABEL: vfnmacc_vf_v8f64_ta:
1690 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
1691 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t
1692 ; CHECK-NEXT: vmv.v.v v8, v12
1694 %elt.head = insertelement <8 x double> poison, double %b, i32 0
1695 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
1696 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1697 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1698 %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl)
1699 %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl)
1700 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %nega, <8 x double> %vb, <8 x double> %negc, <8 x i1> %allones, i32 %evl)
1701 %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl)
1705 define <8 x double> @vfnmacc_vf_v8f64_commute_ta(<8 x double> %a, double %b, <8 x double> %c, <8 x i1> %m, i32 zeroext %evl) {
1706 ; CHECK-LABEL: vfnmacc_vf_v8f64_commute_ta:
1708 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu
1709 ; CHECK-NEXT: vfnmacc.vf v12, fa0, v8, v0.t
1710 ; CHECK-NEXT: vmv.v.v v8, v12
1712 %elt.head = insertelement <8 x double> poison, double %b, i32 0
1713 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
1714 %splat = insertelement <8 x i1> poison, i1 -1, i32 0
1715 %allones = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer
1716 %nega = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %a, <8 x i1> %allones, i32 %evl)
1717 %negc = call <8 x double> @llvm.vp.fneg.v8f64(<8 x double> %c, <8 x i1> %allones, i32 %evl)
1718 %v = call <8 x double> @llvm.vp.fma.v8f64(<8 x double> %vb, <8 x double> %nega, <8 x double> %negc, <8 x i1> %allones, i32 %evl)
1719 %u = call <8 x double> @llvm.vp.select.v8f64(<8 x i1> %m, <8 x double> %v, <8 x double> %c, i32 %evl)