1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA
3 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA
4 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN
5 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN
7 define <vscale x 1 x float> @vfwmaccbf16_vv_nxv1f32(<vscale x 1 x float> %a, <vscale x 1 x bfloat> %b, <vscale x 1 x bfloat> %c) {
8 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv1f32:
10 ; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
11 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v9, v10
14 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv1f32:
16 ; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
17 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v9
18 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v10
19 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
20 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v9
22 %b.ext = fpext <vscale x 1 x bfloat> %b to <vscale x 1 x float>
23 %c.ext = fpext <vscale x 1 x bfloat> %c to <vscale x 1 x float>
24 %res = call <vscale x 1 x float> @llvm.fma.nxv1f32(<vscale x 1 x float> %b.ext, <vscale x 1 x float> %c.ext, <vscale x 1 x float> %a)
25 ret <vscale x 1 x float> %res
28 define <vscale x 1 x float> @vfwmaccbf16_vf_nxv1f32(<vscale x 1 x float> %a, bfloat %b, <vscale x 1 x bfloat> %c) {
29 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv1f32:
31 ; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
32 ; ZVFBFWMA-NEXT: vfwmaccbf16.vf v8, fa0, v9
35 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv1f32:
37 ; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
38 ; ZVFBFMIN-NEXT: slli a0, a0, 16
39 ; ZVFBFMIN-NEXT: fmv.w.x fa5, a0
40 ; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
41 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
42 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
43 ; ZVFBFMIN-NEXT: vfmacc.vf v8, fa5, v10
45 %b.head = insertelement <vscale x 1 x bfloat> poison, bfloat %b, i32 0
46 %b.splat = shufflevector <vscale x 1 x bfloat> %b.head, <vscale x 1 x bfloat> poison, <vscale x 1 x i32> zeroinitializer
47 %b.ext = fpext <vscale x 1 x bfloat> %b.splat to <vscale x 1 x float>
48 %c.ext = fpext <vscale x 1 x bfloat> %c to <vscale x 1 x float>
49 %res = call <vscale x 1 x float> @llvm.fma.nxv1f32(<vscale x 1 x float> %b.ext, <vscale x 1 x float> %c.ext, <vscale x 1 x float> %a)
50 ret <vscale x 1 x float> %res
53 define <vscale x 2 x float> @vfwmaccbf16_vv_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x bfloat> %b, <vscale x 2 x bfloat> %c) {
54 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv2f32:
56 ; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
57 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v9, v10
60 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv2f32:
62 ; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
63 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v9
64 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v10
65 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
66 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v9
68 %b.ext = fpext <vscale x 2 x bfloat> %b to <vscale x 2 x float>
69 %c.ext = fpext <vscale x 2 x bfloat> %c to <vscale x 2 x float>
70 %res = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %b.ext, <vscale x 2 x float> %c.ext, <vscale x 2 x float> %a)
71 ret <vscale x 2 x float> %res
74 define <vscale x 2 x float> @vfwmaccbf16_vf_nxv2f32(<vscale x 2 x float> %a, bfloat %b, <vscale x 2 x bfloat> %c) {
75 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv2f32:
77 ; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
78 ; ZVFBFWMA-NEXT: vfwmaccbf16.vf v8, fa0, v9
81 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv2f32:
83 ; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
84 ; ZVFBFMIN-NEXT: slli a0, a0, 16
85 ; ZVFBFMIN-NEXT: fmv.w.x fa5, a0
86 ; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
87 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
88 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
89 ; ZVFBFMIN-NEXT: vfmacc.vf v8, fa5, v10
91 %b.head = insertelement <vscale x 2 x bfloat> poison, bfloat %b, i32 0
92 %b.splat = shufflevector <vscale x 2 x bfloat> %b.head, <vscale x 2 x bfloat> poison, <vscale x 2 x i32> zeroinitializer
93 %b.ext = fpext <vscale x 2 x bfloat> %b.splat to <vscale x 2 x float>
94 %c.ext = fpext <vscale x 2 x bfloat> %c to <vscale x 2 x float>
95 %res = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %b.ext, <vscale x 2 x float> %c.ext, <vscale x 2 x float> %a)
96 ret <vscale x 2 x float> %res
99 define <vscale x 4 x float> @vfwmaccbf16_vv_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x bfloat> %b, <vscale x 4 x bfloat> %c) {
100 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv4f32:
102 ; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, m1, ta, ma
103 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v10, v11
106 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv4f32:
108 ; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
109 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10
110 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v14, v11
111 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
112 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v12, v14
114 %b.ext = fpext <vscale x 4 x bfloat> %b to <vscale x 4 x float>
115 %c.ext = fpext <vscale x 4 x bfloat> %c to <vscale x 4 x float>
116 %res = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %b.ext, <vscale x 4 x float> %c.ext, <vscale x 4 x float> %a)
117 ret <vscale x 4 x float> %res
120 define <vscale x 4 x float> @vfwmaccbf16_vf_nxv4f32(<vscale x 4 x float> %a, bfloat %b, <vscale x 4 x bfloat> %c) {
121 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv4f32:
123 ; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, m1, ta, ma
124 ; ZVFBFWMA-NEXT: vfwmaccbf16.vf v8, fa0, v10
127 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv4f32:
129 ; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
130 ; ZVFBFMIN-NEXT: slli a0, a0, 16
131 ; ZVFBFMIN-NEXT: fmv.w.x fa5, a0
132 ; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma
133 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10
134 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
135 ; ZVFBFMIN-NEXT: vfmacc.vf v8, fa5, v12
137 %b.head = insertelement <vscale x 4 x bfloat> poison, bfloat %b, i32 0
138 %b.splat = shufflevector <vscale x 4 x bfloat> %b.head, <vscale x 4 x bfloat> poison, <vscale x 4 x i32> zeroinitializer
139 %b.ext = fpext <vscale x 4 x bfloat> %b.splat to <vscale x 4 x float>
140 %c.ext = fpext <vscale x 4 x bfloat> %c to <vscale x 4 x float>
141 %res = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %b.ext, <vscale x 4 x float> %c.ext, <vscale x 4 x float> %a)
142 ret <vscale x 4 x float> %res
145 define <vscale x 8 x float> @vfwmaccbf16_vv_nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) {
146 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv8f32:
148 ; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
149 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v12, v14
152 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv8f32:
154 ; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
155 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
156 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v20, v14
157 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
158 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v16, v20
160 %b.ext = fpext <vscale x 8 x bfloat> %b to <vscale x 8 x float>
161 %c.ext = fpext <vscale x 8 x bfloat> %c to <vscale x 8 x float>
162 %res = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> %b.ext, <vscale x 8 x float> %c.ext, <vscale x 8 x float> %a)
163 ret <vscale x 8 x float> %res
166 define <vscale x 8 x float> @vfwmaccbf16_vf_nxv8f32(<vscale x 8 x float> %a, bfloat %b, <vscale x 8 x bfloat> %c) {
167 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv8f32:
169 ; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
170 ; ZVFBFWMA-NEXT: vfwmaccbf16.vf v8, fa0, v12
173 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv8f32:
175 ; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
176 ; ZVFBFMIN-NEXT: slli a0, a0, 16
177 ; ZVFBFMIN-NEXT: fmv.w.x fa5, a0
178 ; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma
179 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
180 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
181 ; ZVFBFMIN-NEXT: vfmacc.vf v8, fa5, v16
183 %b.head = insertelement <vscale x 8 x bfloat> poison, bfloat %b, i32 0
184 %b.splat = shufflevector <vscale x 8 x bfloat> %b.head, <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
185 %b.ext = fpext <vscale x 8 x bfloat> %b.splat to <vscale x 8 x float>
186 %c.ext = fpext <vscale x 8 x bfloat> %c to <vscale x 8 x float>
187 %res = call <vscale x 8 x float> @llvm.fma.nxv8f32(<vscale x 8 x float> %b.ext, <vscale x 8 x float> %c.ext, <vscale x 8 x float> %a)
188 ret <vscale x 8 x float> %res
191 define <vscale x 16 x float> @vfwmaccbf16_vv_nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x bfloat> %b, <vscale x 16 x bfloat> %c) {
192 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_nxv16f32:
194 ; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
195 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v16, v20
198 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_nxv16f32:
200 ; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
201 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v16
202 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v20
203 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
204 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v24, v0
206 %b.ext = fpext <vscale x 16 x bfloat> %b to <vscale x 16 x float>
207 %c.ext = fpext <vscale x 16 x bfloat> %c to <vscale x 16 x float>
208 %res = call <vscale x 16 x float> @llvm.fma.nxv16f32(<vscale x 16 x float> %b.ext, <vscale x 16 x float> %c.ext, <vscale x 16 x float> %a)
209 ret <vscale x 16 x float> %res
212 define <vscale x 16 x float> @vfwmaccbf16_vf_nxv16f32(<vscale x 16 x float> %a, bfloat %b, <vscale x 16 x bfloat> %c) {
213 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_nxv16f32:
215 ; ZVFBFWMA-NEXT: vsetvli a0, zero, e16, m4, ta, ma
216 ; ZVFBFWMA-NEXT: vfwmaccbf16.vf v8, fa0, v16
219 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_nxv16f32:
221 ; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
222 ; ZVFBFMIN-NEXT: slli a0, a0, 16
223 ; ZVFBFMIN-NEXT: fmv.w.x fa5, a0
224 ; ZVFBFMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
225 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v16
226 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
227 ; ZVFBFMIN-NEXT: vfmacc.vf v8, fa5, v24
229 %b.head = insertelement <vscale x 16 x bfloat> poison, bfloat %b, i32 0
230 %b.splat = shufflevector <vscale x 16 x bfloat> %b.head, <vscale x 16 x bfloat> poison, <vscale x 16 x i32> zeroinitializer
231 %b.ext = fpext <vscale x 16 x bfloat> %b.splat to <vscale x 16 x float>
232 %c.ext = fpext <vscale x 16 x bfloat> %c to <vscale x 16 x float>
233 %res = call <vscale x 16 x float> @llvm.fma.nxv16f32(<vscale x 16 x float> %b.ext, <vscale x 16 x float> %c.ext, <vscale x 16 x float> %a)
234 ret <vscale x 16 x float> %res