1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6 ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
7 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
9 ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
11 ; This tests a mix of vfmacc and vfmadd by using different operand orders to
12 ; trigger commuting in TwoAddressInstructionPass.
14 declare <vscale x 1 x half> @llvm.fma.v1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x half>)
16 define <vscale x 1 x half> @vfmadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc) {
17 ; ZVFH-LABEL: vfmadd_vv_nxv1f16:
19 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
20 ; ZVFH-NEXT: vfmadd.vv v8, v9, v10
23 ; ZVFHMIN-LABEL: vfmadd_vv_nxv1f16:
25 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
26 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
27 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
28 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
29 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
30 ; ZVFHMIN-NEXT: vfmadd.vv v12, v10, v11
31 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
32 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
34 %vd = call <vscale x 1 x half> @llvm.fma.v1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x half> %vc)
35 ret <vscale x 1 x half> %vd
38 define <vscale x 1 x half> @vfmadd_vf_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, half %c) {
39 ; ZVFH-LABEL: vfmadd_vf_nxv1f16:
41 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
42 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v9
45 ; ZVFHMIN-LABEL: vfmadd_vf_nxv1f16:
47 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
48 ; ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
49 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
50 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
51 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
52 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
53 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
54 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v11
55 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
56 ; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v10
57 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
58 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
60 %head = insertelement <vscale x 1 x half> poison, half %c, i32 0
61 %splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
62 %vd = call <vscale x 1 x half> @llvm.fma.v1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %splat, <vscale x 1 x half> %vb)
63 ret <vscale x 1 x half> %vd
66 declare <vscale x 2 x half> @llvm.fma.v2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)
68 define <vscale x 2 x half> @vfmadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x half> %vc) {
69 ; ZVFH-LABEL: vfmadd_vv_nxv2f16:
71 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
72 ; ZVFH-NEXT: vfmadd.vv v8, v10, v9
75 ; ZVFHMIN-LABEL: vfmadd_vv_nxv2f16:
77 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
78 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9
79 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
80 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
81 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
82 ; ZVFHMIN-NEXT: vfmadd.vv v12, v9, v11
83 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
84 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12
86 %vd = call <vscale x 2 x half> @llvm.fma.v2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vc, <vscale x 2 x half> %vb)
87 ret <vscale x 2 x half> %vd
90 define <vscale x 2 x half> @vfmadd_vf_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, half %c) {
91 ; ZVFH-LABEL: vfmadd_vf_nxv2f16:
93 ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
94 ; ZVFH-NEXT: vfmacc.vf v8, fa0, v9
97 ; ZVFHMIN-LABEL: vfmadd_vf_nxv2f16:
99 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
100 ; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma
101 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
102 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
103 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
104 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
105 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
106 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v11
107 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
108 ; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v10
109 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
110 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
112 %head = insertelement <vscale x 2 x half> poison, half %c, i32 0
113 %splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
114 %vd = call <vscale x 2 x half> @llvm.fma.v2f16(<vscale x 2 x half> %vb, <vscale x 2 x half> %splat, <vscale x 2 x half> %va)
115 ret <vscale x 2 x half> %vd
118 declare <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)
120 define <vscale x 4 x half> @vfmadd_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x half> %vc) {
121 ; ZVFH-LABEL: vfmadd_vv_nxv4f16:
123 ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
124 ; ZVFH-NEXT: vfmadd.vv v8, v9, v10
127 ; ZVFHMIN-LABEL: vfmadd_vv_nxv4f16:
129 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
130 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
131 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
132 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
133 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
134 ; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12
135 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
136 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14
138 %vd = call <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half> %vb, <vscale x 4 x half> %va, <vscale x 4 x half> %vc)
139 ret <vscale x 4 x half> %vd
142 define <vscale x 4 x half> @vfmadd_vf_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, half %c) {
143 ; ZVFH-LABEL: vfmadd_vf_nxv4f16:
145 ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma
146 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v9
149 ; ZVFHMIN-LABEL: vfmadd_vf_nxv4f16:
151 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
152 ; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma
153 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
154 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
155 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v10
156 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
157 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
158 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
159 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
160 ; ZVFHMIN-NEXT: vfmadd.vv v16, v14, v10
161 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
162 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
164 %head = insertelement <vscale x 4 x half> poison, half %c, i32 0
165 %splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
166 %vd = call <vscale x 4 x half> @llvm.fma.v4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %splat, <vscale x 4 x half> %vb)
167 ret <vscale x 4 x half> %vd
170 declare <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
172 define <vscale x 8 x half> @vfmadd_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x half> %vc) {
173 ; ZVFH-LABEL: vfmadd_vv_nxv8f16:
175 ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
176 ; ZVFH-NEXT: vfmacc.vv v8, v12, v10
179 ; ZVFHMIN-LABEL: vfmadd_vv_nxv8f16:
181 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
182 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
183 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
184 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
185 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
186 ; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v16
187 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
188 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
190 %vd = call <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %vc, <vscale x 8 x half> %va)
191 ret <vscale x 8 x half> %vd
194 define <vscale x 8 x half> @vfmadd_vf_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, half %c) {
195 ; ZVFH-LABEL: vfmadd_vf_nxv8f16:
197 ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma
198 ; ZVFH-NEXT: vfmacc.vf v8, fa0, v10
201 ; ZVFHMIN-LABEL: vfmadd_vf_nxv8f16:
203 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
204 ; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma
205 ; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
206 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
207 ; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v12
208 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
209 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
210 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
211 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
212 ; ZVFHMIN-NEXT: vfmadd.vv v24, v20, v12
213 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
214 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
216 %head = insertelement <vscale x 8 x half> poison, half %c, i32 0
217 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
218 %vd = call <vscale x 8 x half> @llvm.fma.v8f16(<vscale x 8 x half> %vb, <vscale x 8 x half> %splat, <vscale x 8 x half> %va)
219 ret <vscale x 8 x half> %vd
222 declare <vscale x 16 x half> @llvm.fma.v16f16(<vscale x 16 x half>, <vscale x 16 x half>, <vscale x 16 x half>)
224 define <vscale x 16 x half> @vfmadd_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x half> %vc) {
225 ; ZVFH-LABEL: vfmadd_vv_nxv16f16:
227 ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
228 ; ZVFH-NEXT: vfmadd.vv v8, v16, v12
231 ; ZVFHMIN-LABEL: vfmadd_vv_nxv16f16:
233 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
234 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
235 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
236 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
237 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
238 ; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v24
239 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
240 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
242 %vd = call <vscale x 16 x half> @llvm.fma.v16f16(<vscale x 16 x half> %vc, <vscale x 16 x half> %va, <vscale x 16 x half> %vb)
243 ret <vscale x 16 x half> %vd
246 define <vscale x 16 x half> @vfmadd_vf_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, half %c) {
247 ; ZVFH-LABEL: vfmadd_vf_nxv16f16:
249 ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma
250 ; ZVFH-NEXT: vfmadd.vf v8, fa0, v12
253 ; ZVFHMIN-LABEL: vfmadd_vf_nxv16f16:
255 ; ZVFHMIN-NEXT: addi sp, sp, -16
256 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
257 ; ZVFHMIN-NEXT: csrr a0, vlenb
258 ; ZVFHMIN-NEXT: slli a0, a0, 2
259 ; ZVFHMIN-NEXT: sub sp, sp, a0
260 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
261 ; ZVFHMIN-NEXT: vmv4r.v v28, v12
262 ; ZVFHMIN-NEXT: addi a0, sp, 16
263 ; ZVFHMIN-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
264 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
265 ; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
266 ; ZVFHMIN-NEXT: vfmv.v.f v16, fa5
267 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
268 ; ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16
269 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28
270 ; ZVFHMIN-NEXT: addi a0, sp, 16
271 ; ZVFHMIN-NEXT: vl4r.v v16, (a0) # Unknown-size Folded Reload
272 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
273 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
274 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
275 ; ZVFHMIN-NEXT: vfmadd.vv v16, v0, v8
276 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
277 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16
278 ; ZVFHMIN-NEXT: csrr a0, vlenb
279 ; ZVFHMIN-NEXT: slli a0, a0, 2
280 ; ZVFHMIN-NEXT: add sp, sp, a0
281 ; ZVFHMIN-NEXT: addi sp, sp, 16
283 %head = insertelement <vscale x 16 x half> poison, half %c, i32 0
284 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
285 %vd = call <vscale x 16 x half> @llvm.fma.v16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %splat, <vscale x 16 x half> %vb)
286 ret <vscale x 16 x half> %vd
289 declare <vscale x 32 x half> @llvm.fma.v32f16(<vscale x 32 x half>, <vscale x 32 x half>, <vscale x 32 x half>)
291 define <vscale x 32 x half> @vfmadd_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x half> %vc) {
292 ; ZVFH-LABEL: vfmadd_vv_nxv32f16:
294 ; ZVFH-NEXT: vl8re16.v v24, (a0)
295 ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
296 ; ZVFH-NEXT: vfmacc.vv v8, v16, v24
299 ; ZVFHMIN-LABEL: vfmadd_vv_nxv32f16:
301 ; ZVFHMIN-NEXT: addi sp, sp, -16
302 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
303 ; ZVFHMIN-NEXT: csrr a1, vlenb
304 ; ZVFHMIN-NEXT: li a2, 40
305 ; ZVFHMIN-NEXT: mul a1, a1, a2
306 ; ZVFHMIN-NEXT: sub sp, sp, a1
307 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
308 ; ZVFHMIN-NEXT: csrr a1, vlenb
309 ; ZVFHMIN-NEXT: slli a1, a1, 3
310 ; ZVFHMIN-NEXT: add a1, sp, a1
311 ; ZVFHMIN-NEXT: addi a1, a1, 16
312 ; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
313 ; ZVFHMIN-NEXT: vmv8r.v v0, v8
314 ; ZVFHMIN-NEXT: csrr a1, vlenb
315 ; ZVFHMIN-NEXT: li a2, 24
316 ; ZVFHMIN-NEXT: mul a1, a1, a2
317 ; ZVFHMIN-NEXT: add a1, sp, a1
318 ; ZVFHMIN-NEXT: addi a1, a1, 16
319 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
320 ; ZVFHMIN-NEXT: vl8re16.v v24, (a0)
321 ; ZVFHMIN-NEXT: addi a0, sp, 16
322 ; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
323 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
324 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0
325 ; ZVFHMIN-NEXT: csrr a0, vlenb
326 ; ZVFHMIN-NEXT: slli a0, a0, 5
327 ; ZVFHMIN-NEXT: add a0, sp, a0
328 ; ZVFHMIN-NEXT: addi a0, a0, 16
329 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
330 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16
331 ; ZVFHMIN-NEXT: csrr a0, vlenb
332 ; ZVFHMIN-NEXT: slli a0, a0, 4
333 ; ZVFHMIN-NEXT: add a0, sp, a0
334 ; ZVFHMIN-NEXT: addi a0, a0, 16
335 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
336 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
337 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
338 ; ZVFHMIN-NEXT: csrr a0, vlenb
339 ; ZVFHMIN-NEXT: slli a0, a0, 5
340 ; ZVFHMIN-NEXT: add a0, sp, a0
341 ; ZVFHMIN-NEXT: addi a0, a0, 16
342 ; ZVFHMIN-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
343 ; ZVFHMIN-NEXT: csrr a0, vlenb
344 ; ZVFHMIN-NEXT: slli a0, a0, 4
345 ; ZVFHMIN-NEXT: add a0, sp, a0
346 ; ZVFHMIN-NEXT: addi a0, a0, 16
347 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
348 ; ZVFHMIN-NEXT: vfmadd.vv v16, v8, v0
349 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
350 ; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v16
351 ; ZVFHMIN-NEXT: csrr a0, vlenb
352 ; ZVFHMIN-NEXT: li a1, 24
353 ; ZVFHMIN-NEXT: mul a0, a0, a1
354 ; ZVFHMIN-NEXT: add a0, sp, a0
355 ; ZVFHMIN-NEXT: addi a0, a0, 16
356 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
357 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12
358 ; ZVFHMIN-NEXT: csrr a0, vlenb
359 ; ZVFHMIN-NEXT: slli a0, a0, 5
360 ; ZVFHMIN-NEXT: add a0, sp, a0
361 ; ZVFHMIN-NEXT: addi a0, a0, 16
362 ; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
363 ; ZVFHMIN-NEXT: csrr a0, vlenb
364 ; ZVFHMIN-NEXT: slli a0, a0, 3
365 ; ZVFHMIN-NEXT: add a0, sp, a0
366 ; ZVFHMIN-NEXT: addi a0, a0, 16
367 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
368 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12
369 ; ZVFHMIN-NEXT: addi a0, sp, 16
370 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
371 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
372 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
373 ; ZVFHMIN-NEXT: csrr a0, vlenb
374 ; ZVFHMIN-NEXT: slli a0, a0, 5
375 ; ZVFHMIN-NEXT: add a0, sp, a0
376 ; ZVFHMIN-NEXT: addi a0, a0, 16
377 ; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
378 ; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v16
379 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
380 ; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v8
381 ; ZVFHMIN-NEXT: vmv8r.v v8, v0
382 ; ZVFHMIN-NEXT: csrr a0, vlenb
383 ; ZVFHMIN-NEXT: li a1, 40
384 ; ZVFHMIN-NEXT: mul a0, a0, a1
385 ; ZVFHMIN-NEXT: add sp, sp, a0
386 ; ZVFHMIN-NEXT: addi sp, sp, 16
388 %vd = call <vscale x 32 x half> @llvm.fma.v32f16(<vscale x 32 x half> %vc, <vscale x 32 x half> %vb, <vscale x 32 x half> %va)
389 ret <vscale x 32 x half> %vd
392 define <vscale x 32 x half> @vfmadd_vf_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, half %c) {
393 ; ZVFH-LABEL: vfmadd_vf_nxv32f16:
395 ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
396 ; ZVFH-NEXT: vfmacc.vf v8, fa0, v16
399 ; ZVFHMIN-LABEL: vfmadd_vf_nxv32f16:
401 ; ZVFHMIN-NEXT: addi sp, sp, -16
402 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16
403 ; ZVFHMIN-NEXT: csrr a0, vlenb
404 ; ZVFHMIN-NEXT: slli a0, a0, 4
405 ; ZVFHMIN-NEXT: sub sp, sp, a0
406 ; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
407 ; ZVFHMIN-NEXT: addi a0, sp, 16
408 ; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
409 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
410 ; ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma
411 ; ZVFHMIN-NEXT: vfmv.v.f v24, fa5
412 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
413 ; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v24
414 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8
415 ; ZVFHMIN-NEXT: csrr a0, vlenb
416 ; ZVFHMIN-NEXT: slli a0, a0, 3
417 ; ZVFHMIN-NEXT: add a0, sp, a0
418 ; ZVFHMIN-NEXT: addi a0, a0, 16
419 ; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
420 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0
421 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v16
422 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
423 ; ZVFHMIN-NEXT: csrr a0, vlenb
424 ; ZVFHMIN-NEXT: slli a0, a0, 3
425 ; ZVFHMIN-NEXT: add a0, sp, a0
426 ; ZVFHMIN-NEXT: addi a0, a0, 16
427 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
428 ; ZVFHMIN-NEXT: vfmadd.vv v0, v8, v24
429 ; ZVFHMIN-NEXT: vmv8r.v v24, v8
430 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
431 ; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v0
432 ; ZVFHMIN-NEXT: addi a0, sp, 16
433 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
434 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v0, v12
435 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20
436 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
437 ; ZVFHMIN-NEXT: vfmadd.vv v8, v24, v0
438 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
439 ; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v8
440 ; ZVFHMIN-NEXT: vmv8r.v v8, v16
441 ; ZVFHMIN-NEXT: csrr a0, vlenb
442 ; ZVFHMIN-NEXT: slli a0, a0, 4
443 ; ZVFHMIN-NEXT: add sp, sp, a0
444 ; ZVFHMIN-NEXT: addi sp, sp, 16
446 %head = insertelement <vscale x 32 x half> poison, half %c, i32 0
447 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
448 %vd = call <vscale x 32 x half> @llvm.fma.v32f16(<vscale x 32 x half> %vb, <vscale x 32 x half> %splat, <vscale x 32 x half> %va)
449 ret <vscale x 32 x half> %vd
452 declare <vscale x 1 x float> @llvm.fma.v1f32(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>)
454 define <vscale x 1 x float> @vfmadd_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x float> %vc) {
455 ; CHECK-LABEL: vfmadd_vv_nxv1f32:
457 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
458 ; CHECK-NEXT: vfmadd.vv v8, v9, v10
460 %vd = call <vscale x 1 x float> @llvm.fma.v1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x float> %vc)
461 ret <vscale x 1 x float> %vd
464 define <vscale x 1 x float> @vfmadd_vf_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, float %c) {
465 ; CHECK-LABEL: vfmadd_vf_nxv1f32:
467 ; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
468 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9
470 %head = insertelement <vscale x 1 x float> poison, float %c, i32 0
471 %splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
472 %vd = call <vscale x 1 x float> @llvm.fma.v1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %splat, <vscale x 1 x float> %vb)
473 ret <vscale x 1 x float> %vd
476 declare <vscale x 2 x float> @llvm.fma.v2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
478 define <vscale x 2 x float> @vfmadd_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x float> %vc) {
479 ; CHECK-LABEL: vfmadd_vv_nxv2f32:
481 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
482 ; CHECK-NEXT: vfmadd.vv v8, v10, v9
484 %vd = call <vscale x 2 x float> @llvm.fma.v2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vc, <vscale x 2 x float> %vb)
485 ret <vscale x 2 x float> %vd
488 define <vscale x 2 x float> @vfmadd_vf_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, float %c) {
489 ; CHECK-LABEL: vfmadd_vf_nxv2f32:
491 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma
492 ; CHECK-NEXT: vfmacc.vf v8, fa0, v9
494 %head = insertelement <vscale x 2 x float> poison, float %c, i32 0
495 %splat = shufflevector <vscale x 2 x float> %head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
496 %vd = call <vscale x 2 x float> @llvm.fma.v2f32(<vscale x 2 x float> %vb, <vscale x 2 x float> %splat, <vscale x 2 x float> %va)
497 ret <vscale x 2 x float> %vd
500 declare <vscale x 4 x float> @llvm.fma.v4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
502 define <vscale x 4 x float> @vfmadd_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x float> %vc) {
503 ; CHECK-LABEL: vfmadd_vv_nxv4f32:
505 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
506 ; CHECK-NEXT: vfmadd.vv v8, v10, v12
508 %vd = call <vscale x 4 x float> @llvm.fma.v4f32(<vscale x 4 x float> %vb, <vscale x 4 x float> %va, <vscale x 4 x float> %vc)
509 ret <vscale x 4 x float> %vd
512 define <vscale x 4 x float> @vfmadd_vf_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, float %c) {
513 ; CHECK-LABEL: vfmadd_vf_nxv4f32:
515 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
516 ; CHECK-NEXT: vfmadd.vf v8, fa0, v10
518 %head = insertelement <vscale x 4 x float> poison, float %c, i32 0
519 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
520 %vd = call <vscale x 4 x float> @llvm.fma.v4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %splat, <vscale x 4 x float> %vb)
521 ret <vscale x 4 x float> %vd
524 declare <vscale x 8 x float> @llvm.fma.v8f32(<vscale x 8 x float>, <vscale x 8 x float>, <vscale x 8 x float>)
526 define <vscale x 8 x float> @vfmadd_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x float> %vc) {
527 ; CHECK-LABEL: vfmadd_vv_nxv8f32:
529 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
530 ; CHECK-NEXT: vfmacc.vv v8, v16, v12
532 %vd = call <vscale x 8 x float> @llvm.fma.v8f32(<vscale x 8 x float> %vb, <vscale x 8 x float> %vc, <vscale x 8 x float> %va)
533 ret <vscale x 8 x float> %vd
536 define <vscale x 8 x float> @vfmadd_vf_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, float %c) {
537 ; CHECK-LABEL: vfmadd_vf_nxv8f32:
539 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma
540 ; CHECK-NEXT: vfmacc.vf v8, fa0, v12
542 %head = insertelement <vscale x 8 x float> poison, float %c, i32 0
543 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
544 %vd = call <vscale x 8 x float> @llvm.fma.v8f32(<vscale x 8 x float> %vb, <vscale x 8 x float> %splat, <vscale x 8 x float> %va)
545 ret <vscale x 8 x float> %vd
548 declare <vscale x 16 x float> @llvm.fma.v16f32(<vscale x 16 x float>, <vscale x 16 x float>, <vscale x 16 x float>)
550 define <vscale x 16 x float> @vfmadd_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, <vscale x 16 x float> %vc) {
551 ; CHECK-LABEL: vfmadd_vv_nxv16f32:
553 ; CHECK-NEXT: vl8re32.v v24, (a0)
554 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
555 ; CHECK-NEXT: vfmadd.vv v8, v24, v16
557 %vd = call <vscale x 16 x float> @llvm.fma.v16f32(<vscale x 16 x float> %vc, <vscale x 16 x float> %va, <vscale x 16 x float> %vb)
558 ret <vscale x 16 x float> %vd
561 define <vscale x 16 x float> @vfmadd_vf_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, float %c) {
562 ; CHECK-LABEL: vfmadd_vf_nxv16f32:
564 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
565 ; CHECK-NEXT: vfmadd.vf v8, fa0, v16
567 %head = insertelement <vscale x 16 x float> poison, float %c, i32 0
568 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
569 %vd = call <vscale x 16 x float> @llvm.fma.v16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %splat, <vscale x 16 x float> %vb)
570 ret <vscale x 16 x float> %vd
573 declare <vscale x 1 x double> @llvm.fma.v1f64(<vscale x 1 x double>, <vscale x 1 x double>, <vscale x 1 x double>)
575 define <vscale x 1 x double> @vfmadd_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x double> %vc) {
576 ; CHECK-LABEL: vfmadd_vv_nxv1f64:
578 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
579 ; CHECK-NEXT: vfmadd.vv v8, v9, v10
581 %vd = call <vscale x 1 x double> @llvm.fma.v1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x double> %vc)
582 ret <vscale x 1 x double> %vd
585 define <vscale x 1 x double> @vfmadd_vf_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, double %c) {
586 ; CHECK-LABEL: vfmadd_vf_nxv1f64:
588 ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma
589 ; CHECK-NEXT: vfmadd.vf v8, fa0, v9
591 %head = insertelement <vscale x 1 x double> poison, double %c, i32 0
592 %splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
593 %vd = call <vscale x 1 x double> @llvm.fma.v1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %splat, <vscale x 1 x double> %vb)
594 ret <vscale x 1 x double> %vd
597 declare <vscale x 2 x double> @llvm.fma.v2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
599 define <vscale x 2 x double> @vfmadd_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x double> %vc) {
600 ; CHECK-LABEL: vfmadd_vv_nxv2f64:
602 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
603 ; CHECK-NEXT: vfmadd.vv v8, v12, v10
605 %vd = call <vscale x 2 x double> @llvm.fma.v2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vc, <vscale x 2 x double> %vb)
606 ret <vscale x 2 x double> %vd
609 define <vscale x 2 x double> @vfmadd_vf_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, double %c) {
610 ; CHECK-LABEL: vfmadd_vf_nxv2f64:
612 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
613 ; CHECK-NEXT: vfmacc.vf v8, fa0, v10
615 %head = insertelement <vscale x 2 x double> poison, double %c, i32 0
616 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
617 %vd = call <vscale x 2 x double> @llvm.fma.v2f64(<vscale x 2 x double> %vb, <vscale x 2 x double> %splat, <vscale x 2 x double> %va)
618 ret <vscale x 2 x double> %vd
621 declare <vscale x 4 x double> @llvm.fma.v4f64(<vscale x 4 x double>, <vscale x 4 x double>, <vscale x 4 x double>)
623 define <vscale x 4 x double> @vfmadd_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x double> %vc) {
624 ; CHECK-LABEL: vfmadd_vv_nxv4f64:
626 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
627 ; CHECK-NEXT: vfmadd.vv v8, v12, v16
629 %vd = call <vscale x 4 x double> @llvm.fma.v4f64(<vscale x 4 x double> %vb, <vscale x 4 x double> %va, <vscale x 4 x double> %vc)
630 ret <vscale x 4 x double> %vd
633 define <vscale x 4 x double> @vfmadd_vf_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, double %c) {
634 ; CHECK-LABEL: vfmadd_vf_nxv4f64:
636 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma
637 ; CHECK-NEXT: vfmadd.vf v8, fa0, v12
639 %head = insertelement <vscale x 4 x double> poison, double %c, i32 0
640 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
641 %vd = call <vscale x 4 x double> @llvm.fma.v4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %splat, <vscale x 4 x double> %vb)
642 ret <vscale x 4 x double> %vd
645 declare <vscale x 8 x double> @llvm.fma.v8f64(<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double>)
647 define <vscale x 8 x double> @vfmadd_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x double> %vc) {
648 ; CHECK-LABEL: vfmadd_vv_nxv8f64:
650 ; CHECK-NEXT: vl8re64.v v24, (a0)
651 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
652 ; CHECK-NEXT: vfmacc.vv v8, v16, v24
654 %vd = call <vscale x 8 x double> @llvm.fma.v8f64(<vscale x 8 x double> %vb, <vscale x 8 x double> %vc, <vscale x 8 x double> %va)
655 ret <vscale x 8 x double> %vd
658 define <vscale x 8 x double> @vfmadd_vf_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, double %c) {
659 ; CHECK-LABEL: vfmadd_vf_nxv8f64:
661 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
662 ; CHECK-NEXT: vfmacc.vf v8, fa0, v16
664 %head = insertelement <vscale x 8 x double> poison, double %c, i32 0
665 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
666 %vd = call <vscale x 8 x double> @llvm.fma.v8f64(<vscale x 8 x double> %vb, <vscale x 8 x double> %splat, <vscale x 8 x double> %va)
667 ret <vscale x 8 x double> %vd