1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA
3 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA
4 ; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN
5 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN
7 define <1 x float> @vfwmaccbf16_vv_v1f32(<1 x float> %a, <1 x bfloat> %b, <1 x bfloat> %c) {
8 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v1f32:
10 ; ZVFBFWMA-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
11 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v9, v10
14 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v1f32:
16 ; ZVFBFMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
17 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v9
18 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v10
19 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
20 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v9
22 %b.ext = fpext <1 x bfloat> %b to <1 x float>
23 %c.ext = fpext <1 x bfloat> %c to <1 x float>
24 %res = call <1 x float> @llvm.fma.v1f32(<1 x float> %b.ext, <1 x float> %c.ext, <1 x float> %a)
28 define <1 x float> @vfwmaccbf16_vf_v1f32(<1 x float> %a, bfloat %b, <1 x bfloat> %c) {
29 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v1f32:
31 ; ZVFBFWMA-NEXT: fmv.x.h a0, fa0
32 ; ZVFBFWMA-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
33 ; ZVFBFWMA-NEXT: vmv.s.x v10, a0
34 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v10, v9
37 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v1f32:
39 ; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
40 ; ZVFBFMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
41 ; ZVFBFMIN-NEXT: vmv.s.x v10, a0
42 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v10
43 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
44 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
45 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v10
47 %b.head = insertelement <1 x bfloat> poison, bfloat %b, i32 0
48 %b.splat = shufflevector <1 x bfloat> %b.head, <1 x bfloat> poison, <1 x i32> zeroinitializer
49 %b.ext = fpext <1 x bfloat> %b.splat to <1 x float>
50 %c.ext = fpext <1 x bfloat> %c to <1 x float>
51 %res = call <1 x float> @llvm.fma.v1f32(<1 x float> %b.ext, <1 x float> %c.ext, <1 x float> %a)
55 define <2 x float> @vfwmaccbf16_vv_v2f32(<2 x float> %a, <2 x bfloat> %b, <2 x bfloat> %c) {
56 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v2f32:
58 ; ZVFBFWMA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
59 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v9, v10
62 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v2f32:
64 ; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
65 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v9
66 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v10
67 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
68 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v9
70 %b.ext = fpext <2 x bfloat> %b to <2 x float>
71 %c.ext = fpext <2 x bfloat> %c to <2 x float>
72 %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %b.ext, <2 x float> %c.ext, <2 x float> %a)
76 define <2 x float> @vfwmaccbf16_vf_v2f32(<2 x float> %a, bfloat %b, <2 x bfloat> %c) {
77 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v2f32:
79 ; ZVFBFWMA-NEXT: fmv.x.h a0, fa0
80 ; ZVFBFWMA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
81 ; ZVFBFWMA-NEXT: vmv.v.x v10, a0
82 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v10, v9
85 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v2f32:
87 ; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
88 ; ZVFBFMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
89 ; ZVFBFMIN-NEXT: vmv.v.x v10, a0
90 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v10
91 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
92 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
93 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v10
95 %b.head = insertelement <2 x bfloat> poison, bfloat %b, i32 0
96 %b.splat = shufflevector <2 x bfloat> %b.head, <2 x bfloat> poison, <2 x i32> zeroinitializer
97 %b.ext = fpext <2 x bfloat> %b.splat to <2 x float>
98 %c.ext = fpext <2 x bfloat> %c to <2 x float>
99 %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %b.ext, <2 x float> %c.ext, <2 x float> %a)
103 define <4 x float> @vfwmaccbf16_vv_v4f32(<4 x float> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
104 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v4f32:
106 ; ZVFBFWMA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
107 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v9, v10
110 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v4f32:
112 ; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
113 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v9
114 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v10
115 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
116 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v9
118 %b.ext = fpext <4 x bfloat> %b to <4 x float>
119 %c.ext = fpext <4 x bfloat> %c to <4 x float>
120 %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %b.ext, <4 x float> %c.ext, <4 x float> %a)
124 define <4 x float> @vfwmaccbf16_vf_v4f32(<4 x float> %a, bfloat %b, <4 x bfloat> %c) {
125 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v4f32:
127 ; ZVFBFWMA-NEXT: fmv.x.h a0, fa0
128 ; ZVFBFWMA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
129 ; ZVFBFWMA-NEXT: vmv.v.x v10, a0
130 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v10, v9
133 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v4f32:
135 ; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
136 ; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
137 ; ZVFBFMIN-NEXT: vmv.v.x v10, a0
138 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v11, v10
139 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9
140 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
141 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v10
143 %b.head = insertelement <4 x bfloat> poison, bfloat %b, i32 0
144 %b.splat = shufflevector <4 x bfloat> %b.head, <4 x bfloat> poison, <4 x i32> zeroinitializer
145 %b.ext = fpext <4 x bfloat> %b.splat to <4 x float>
146 %c.ext = fpext <4 x bfloat> %c to <4 x float>
147 %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %b.ext, <4 x float> %c.ext, <4 x float> %a)
151 define <8 x float> @vfwmaccbf16_vv_v8f32(<8 x float> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
152 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v8f32:
154 ; ZVFBFWMA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
155 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v10, v11
158 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v8f32:
160 ; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
161 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v10
162 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v14, v11
163 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
164 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v12, v14
166 %b.ext = fpext <8 x bfloat> %b to <8 x float>
167 %c.ext = fpext <8 x bfloat> %c to <8 x float>
168 %res = call <8 x float> @llvm.fma.v8f32(<8 x float> %b.ext, <8 x float> %c.ext, <8 x float> %a)
172 define <8 x float> @vfwmaccbf16_vf_v8f32(<8 x float> %a, bfloat %b, <8 x bfloat> %c) {
173 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v8f32:
175 ; ZVFBFWMA-NEXT: fmv.x.h a0, fa0
176 ; ZVFBFWMA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
177 ; ZVFBFWMA-NEXT: vmv.v.x v11, a0
178 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v11, v10
181 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v8f32:
183 ; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
184 ; ZVFBFMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
185 ; ZVFBFMIN-NEXT: vmv.v.x v11, a0
186 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v12, v11
187 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v14, v10
188 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
189 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v12, v14
191 %b.head = insertelement <8 x bfloat> poison, bfloat %b, i32 0
192 %b.splat = shufflevector <8 x bfloat> %b.head, <8 x bfloat> poison, <8 x i32> zeroinitializer
193 %b.ext = fpext <8 x bfloat> %b.splat to <8 x float>
194 %c.ext = fpext <8 x bfloat> %c to <8 x float>
195 %res = call <8 x float> @llvm.fma.v8f32(<8 x float> %b.ext, <8 x float> %c.ext, <8 x float> %a)
199 define <16 x float> @vfwmaccbf16_vv_v16f32(<16 x float> %a, <16 x bfloat> %b, <16 x bfloat> %c) {
200 ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v16f32:
202 ; ZVFBFWMA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
203 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v12, v14
206 ; ZVFBFMIN-LABEL: vfwmaccbf16_vv_v16f32:
208 ; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
209 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v12
210 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v20, v14
211 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
212 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v16, v20
214 %b.ext = fpext <16 x bfloat> %b to <16 x float>
215 %c.ext = fpext <16 x bfloat> %c to <16 x float>
216 %res = call <16 x float> @llvm.fma.v16f32(<16 x float> %b.ext, <16 x float> %c.ext, <16 x float> %a)
217 ret <16 x float> %res
220 define <16 x float> @vfwmaccbf16_vf_v16f32(<16 x float> %a, bfloat %b, <16 x bfloat> %c) {
221 ; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v16f32:
223 ; ZVFBFWMA-NEXT: fmv.x.h a0, fa0
224 ; ZVFBFWMA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
225 ; ZVFBFWMA-NEXT: vmv.v.x v14, a0
226 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v14, v12
229 ; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v16f32:
231 ; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
232 ; ZVFBFMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
233 ; ZVFBFMIN-NEXT: vmv.v.x v14, a0
234 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v16, v14
235 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v20, v12
236 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
237 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v16, v20
239 %b.head = insertelement <16 x bfloat> poison, bfloat %b, i32 0
240 %b.splat = shufflevector <16 x bfloat> %b.head, <16 x bfloat> poison, <16 x i32> zeroinitializer
241 %b.ext = fpext <16 x bfloat> %b.splat to <16 x float>
242 %c.ext = fpext <16 x bfloat> %c to <16 x float>
243 %res = call <16 x float> @llvm.fma.v16f32(<16 x float> %b.ext, <16 x float> %c.ext, <16 x float> %a)
244 ret <16 x float> %res
247 define <32 x float> @vfwmaccbf32_vv_v32f32(<32 x float> %a, <32 x bfloat> %b, <32 x bfloat> %c) {
248 ; ZVFBFWMA-LABEL: vfwmaccbf32_vv_v32f32:
250 ; ZVFBFWMA-NEXT: li a0, 32
251 ; ZVFBFWMA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
252 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v16, v20
255 ; ZVFBFMIN-LABEL: vfwmaccbf32_vv_v32f32:
257 ; ZVFBFMIN-NEXT: li a0, 32
258 ; ZVFBFMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
259 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v16
260 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v20
261 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
262 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v24, v0
264 %b.ext = fpext <32 x bfloat> %b to <32 x float>
265 %c.ext = fpext <32 x bfloat> %c to <32 x float>
266 %res = call <32 x float> @llvm.fma.v32f32(<32 x float> %b.ext, <32 x float> %c.ext, <32 x float> %a)
267 ret <32 x float> %res
270 define <32 x float> @vfwmaccbf32_vf_v32f32(<32 x float> %a, bfloat %b, <32 x bfloat> %c) {
271 ; ZVFBFWMA-LABEL: vfwmaccbf32_vf_v32f32:
273 ; ZVFBFWMA-NEXT: fmv.x.h a0, fa0
274 ; ZVFBFWMA-NEXT: li a1, 32
275 ; ZVFBFWMA-NEXT: vsetvli zero, a1, e16, m4, ta, ma
276 ; ZVFBFWMA-NEXT: vmv.v.x v20, a0
277 ; ZVFBFWMA-NEXT: vfwmaccbf16.vv v8, v20, v16
280 ; ZVFBFMIN-LABEL: vfwmaccbf32_vf_v32f32:
282 ; ZVFBFMIN-NEXT: fmv.x.w a0, fa0
283 ; ZVFBFMIN-NEXT: li a1, 32
284 ; ZVFBFMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma
285 ; ZVFBFMIN-NEXT: vmv.v.x v20, a0
286 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v24, v20
287 ; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v0, v16
288 ; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
289 ; ZVFBFMIN-NEXT: vfmacc.vv v8, v24, v0
291 %b.head = insertelement <32 x bfloat> poison, bfloat %b, i32 0
292 %b.splat = shufflevector <32 x bfloat> %b.head, <32 x bfloat> poison, <32 x i32> zeroinitializer
293 %b.ext = fpext <32 x bfloat> %b.splat to <32 x float>
294 %c.ext = fpext <32 x bfloat> %c to <32 x float>
295 %res = call <32 x float> @llvm.fma.v32f32(<32 x float> %b.ext, <32 x float> %c.ext, <32 x float> %a)
296 ret <32 x float> %res