1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+f,+d -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s
4 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s
7 define <2 x float> @vfwmul_v2f16(ptr %x, ptr %y) {
8 ; CHECK-LABEL: vfwmul_v2f16:
10 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
11 ; CHECK-NEXT: vle16.v v9, (a0)
12 ; CHECK-NEXT: vle16.v v10, (a1)
13 ; CHECK-NEXT: vfwmul.vv v8, v9, v10
15 %a = load <2 x half>, ptr %x
16 %b = load <2 x half>, ptr %y
17 %c = fpext <2 x half> %a to <2 x float>
18 %d = fpext <2 x half> %b to <2 x float>
19 %e = fmul <2 x float> %c, %d
23 define <4 x float> @vfwmul_v4f16(ptr %x, ptr %y) {
24 ; CHECK-LABEL: vfwmul_v4f16:
26 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
27 ; CHECK-NEXT: vle16.v v9, (a0)
28 ; CHECK-NEXT: vle16.v v10, (a1)
29 ; CHECK-NEXT: vfwmul.vv v8, v9, v10
31 %a = load <4 x half>, ptr %x
32 %b = load <4 x half>, ptr %y
33 %c = fpext <4 x half> %a to <4 x float>
34 %d = fpext <4 x half> %b to <4 x float>
35 %e = fmul <4 x float> %c, %d
39 define <8 x float> @vfwmul_v8f16(ptr %x, ptr %y) {
40 ; CHECK-LABEL: vfwmul_v8f16:
42 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
43 ; CHECK-NEXT: vle16.v v10, (a0)
44 ; CHECK-NEXT: vle16.v v11, (a1)
45 ; CHECK-NEXT: vfwmul.vv v8, v10, v11
47 %a = load <8 x half>, ptr %x
48 %b = load <8 x half>, ptr %y
49 %c = fpext <8 x half> %a to <8 x float>
50 %d = fpext <8 x half> %b to <8 x float>
51 %e = fmul <8 x float> %c, %d
55 define <16 x float> @vfwmul_v16f16(ptr %x, ptr %y) {
56 ; CHECK-LABEL: vfwmul_v16f16:
58 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
59 ; CHECK-NEXT: vle16.v v12, (a0)
60 ; CHECK-NEXT: vle16.v v14, (a1)
61 ; CHECK-NEXT: vfwmul.vv v8, v12, v14
63 %a = load <16 x half>, ptr %x
64 %b = load <16 x half>, ptr %y
65 %c = fpext <16 x half> %a to <16 x float>
66 %d = fpext <16 x half> %b to <16 x float>
67 %e = fmul <16 x float> %c, %d
71 define <32 x float> @vfwmul_v32f16(ptr %x, ptr %y) {
72 ; CHECK-LABEL: vfwmul_v32f16:
74 ; CHECK-NEXT: li a2, 32
75 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
76 ; CHECK-NEXT: vle16.v v16, (a0)
77 ; CHECK-NEXT: vle16.v v20, (a1)
78 ; CHECK-NEXT: vfwmul.vv v8, v16, v20
80 %a = load <32 x half>, ptr %x
81 %b = load <32 x half>, ptr %y
82 %c = fpext <32 x half> %a to <32 x float>
83 %d = fpext <32 x half> %b to <32 x float>
84 %e = fmul <32 x float> %c, %d
88 define <64 x float> @vfwmul_v64f16(ptr %x, ptr %y) {
89 ; CHECK-LABEL: vfwmul_v64f16:
91 ; CHECK-NEXT: addi sp, sp, -16
92 ; CHECK-NEXT: .cfi_def_cfa_offset 16
93 ; CHECK-NEXT: csrr a2, vlenb
94 ; CHECK-NEXT: slli a2, a2, 4
95 ; CHECK-NEXT: sub sp, sp, a2
96 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
97 ; CHECK-NEXT: li a2, 64
98 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
99 ; CHECK-NEXT: vle16.v v8, (a0)
100 ; CHECK-NEXT: addi a0, sp, 16
101 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
102 ; CHECK-NEXT: vle16.v v0, (a1)
103 ; CHECK-NEXT: li a0, 32
104 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
105 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
106 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
107 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
108 ; CHECK-NEXT: vfwmul.vv v24, v16, v8
109 ; CHECK-NEXT: csrr a0, vlenb
110 ; CHECK-NEXT: slli a0, a0, 3
111 ; CHECK-NEXT: add a0, sp, a0
112 ; CHECK-NEXT: addi a0, a0, 16
113 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
114 ; CHECK-NEXT: addi a0, sp, 16
115 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
116 ; CHECK-NEXT: vfwmul.vv v8, v16, v0
117 ; CHECK-NEXT: csrr a0, vlenb
118 ; CHECK-NEXT: slli a0, a0, 3
119 ; CHECK-NEXT: add a0, sp, a0
120 ; CHECK-NEXT: addi a0, a0, 16
121 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
122 ; CHECK-NEXT: csrr a0, vlenb
123 ; CHECK-NEXT: slli a0, a0, 4
124 ; CHECK-NEXT: add sp, sp, a0
125 ; CHECK-NEXT: .cfi_def_cfa sp, 16
126 ; CHECK-NEXT: addi sp, sp, 16
127 ; CHECK-NEXT: .cfi_def_cfa_offset 0
129 %a = load <64 x half>, ptr %x
130 %b = load <64 x half>, ptr %y
131 %c = fpext <64 x half> %a to <64 x float>
132 %d = fpext <64 x half> %b to <64 x float>
133 %e = fmul <64 x float> %c, %d
137 define <2 x double> @vfwmul_v2f32(ptr %x, ptr %y) {
138 ; CHECK-LABEL: vfwmul_v2f32:
140 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
141 ; CHECK-NEXT: vle32.v v9, (a0)
142 ; CHECK-NEXT: vle32.v v10, (a1)
143 ; CHECK-NEXT: vfwmul.vv v8, v9, v10
145 %a = load <2 x float>, ptr %x
146 %b = load <2 x float>, ptr %y
147 %c = fpext <2 x float> %a to <2 x double>
148 %d = fpext <2 x float> %b to <2 x double>
149 %e = fmul <2 x double> %c, %d
153 define <4 x double> @vfwmul_v4f32(ptr %x, ptr %y) {
154 ; CHECK-LABEL: vfwmul_v4f32:
156 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
157 ; CHECK-NEXT: vle32.v v10, (a0)
158 ; CHECK-NEXT: vle32.v v11, (a1)
159 ; CHECK-NEXT: vfwmul.vv v8, v10, v11
161 %a = load <4 x float>, ptr %x
162 %b = load <4 x float>, ptr %y
163 %c = fpext <4 x float> %a to <4 x double>
164 %d = fpext <4 x float> %b to <4 x double>
165 %e = fmul <4 x double> %c, %d
169 define <8 x double> @vfwmul_v8f32(ptr %x, ptr %y) {
170 ; CHECK-LABEL: vfwmul_v8f32:
172 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
173 ; CHECK-NEXT: vle32.v v12, (a0)
174 ; CHECK-NEXT: vle32.v v14, (a1)
175 ; CHECK-NEXT: vfwmul.vv v8, v12, v14
177 %a = load <8 x float>, ptr %x
178 %b = load <8 x float>, ptr %y
179 %c = fpext <8 x float> %a to <8 x double>
180 %d = fpext <8 x float> %b to <8 x double>
181 %e = fmul <8 x double> %c, %d
185 define <16 x double> @vfwmul_v16f32(ptr %x, ptr %y) {
186 ; CHECK-LABEL: vfwmul_v16f32:
188 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
189 ; CHECK-NEXT: vle32.v v16, (a0)
190 ; CHECK-NEXT: vle32.v v20, (a1)
191 ; CHECK-NEXT: vfwmul.vv v8, v16, v20
193 %a = load <16 x float>, ptr %x
194 %b = load <16 x float>, ptr %y
195 %c = fpext <16 x float> %a to <16 x double>
196 %d = fpext <16 x float> %b to <16 x double>
197 %e = fmul <16 x double> %c, %d
201 define <32 x double> @vfwmul_v32f32(ptr %x, ptr %y) {
202 ; CHECK-LABEL: vfwmul_v32f32:
204 ; CHECK-NEXT: addi sp, sp, -16
205 ; CHECK-NEXT: .cfi_def_cfa_offset 16
206 ; CHECK-NEXT: csrr a2, vlenb
207 ; CHECK-NEXT: slli a2, a2, 4
208 ; CHECK-NEXT: sub sp, sp, a2
209 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
210 ; CHECK-NEXT: li a2, 32
211 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
212 ; CHECK-NEXT: vle32.v v8, (a0)
213 ; CHECK-NEXT: addi a0, sp, 16
214 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
215 ; CHECK-NEXT: vle32.v v0, (a1)
216 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
217 ; CHECK-NEXT: vslidedown.vi v16, v8, 16
218 ; CHECK-NEXT: vslidedown.vi v8, v0, 16
219 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
220 ; CHECK-NEXT: vfwmul.vv v24, v16, v8
221 ; CHECK-NEXT: csrr a0, vlenb
222 ; CHECK-NEXT: slli a0, a0, 3
223 ; CHECK-NEXT: add a0, sp, a0
224 ; CHECK-NEXT: addi a0, a0, 16
225 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
226 ; CHECK-NEXT: addi a0, sp, 16
227 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
228 ; CHECK-NEXT: vfwmul.vv v8, v16, v0
229 ; CHECK-NEXT: csrr a0, vlenb
230 ; CHECK-NEXT: slli a0, a0, 3
231 ; CHECK-NEXT: add a0, sp, a0
232 ; CHECK-NEXT: addi a0, a0, 16
233 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
234 ; CHECK-NEXT: csrr a0, vlenb
235 ; CHECK-NEXT: slli a0, a0, 4
236 ; CHECK-NEXT: add sp, sp, a0
237 ; CHECK-NEXT: .cfi_def_cfa sp, 16
238 ; CHECK-NEXT: addi sp, sp, 16
239 ; CHECK-NEXT: .cfi_def_cfa_offset 0
241 %a = load <32 x float>, ptr %x
242 %b = load <32 x float>, ptr %y
243 %c = fpext <32 x float> %a to <32 x double>
244 %d = fpext <32 x float> %b to <32 x double>
245 %e = fmul <32 x double> %c, %d
249 define <2 x float> @vfwmul_vf_v2f16(ptr %x, half %y) {
250 ; CHECK-LABEL: vfwmul_vf_v2f16:
252 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
253 ; CHECK-NEXT: vle16.v v9, (a0)
254 ; CHECK-NEXT: vfwmul.vf v8, v9, fa0
256 %a = load <2 x half>, ptr %x
257 %b = insertelement <2 x half> poison, half %y, i32 0
258 %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer
259 %d = fpext <2 x half> %a to <2 x float>
260 %e = fpext <2 x half> %c to <2 x float>
261 %f = fmul <2 x float> %d, %e
265 define <4 x float> @vfwmul_vf_v4f16(ptr %x, half %y) {
266 ; CHECK-LABEL: vfwmul_vf_v4f16:
268 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
269 ; CHECK-NEXT: vle16.v v9, (a0)
270 ; CHECK-NEXT: vfwmul.vf v8, v9, fa0
272 %a = load <4 x half>, ptr %x
273 %b = insertelement <4 x half> poison, half %y, i32 0
274 %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer
275 %d = fpext <4 x half> %a to <4 x float>
276 %e = fpext <4 x half> %c to <4 x float>
277 %f = fmul <4 x float> %d, %e
281 define <8 x float> @vfwmul_vf_v8f16(ptr %x, half %y) {
282 ; CHECK-LABEL: vfwmul_vf_v8f16:
284 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
285 ; CHECK-NEXT: vle16.v v10, (a0)
286 ; CHECK-NEXT: vfwmul.vf v8, v10, fa0
288 %a = load <8 x half>, ptr %x
289 %b = insertelement <8 x half> poison, half %y, i32 0
290 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
291 %d = fpext <8 x half> %a to <8 x float>
292 %e = fpext <8 x half> %c to <8 x float>
293 %f = fmul <8 x float> %d, %e
297 define <16 x float> @vfwmul_vf_v16f16(ptr %x, half %y) {
298 ; CHECK-LABEL: vfwmul_vf_v16f16:
300 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
301 ; CHECK-NEXT: vle16.v v12, (a0)
302 ; CHECK-NEXT: vfwmul.vf v8, v12, fa0
304 %a = load <16 x half>, ptr %x
305 %b = insertelement <16 x half> poison, half %y, i32 0
306 %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer
307 %d = fpext <16 x half> %a to <16 x float>
308 %e = fpext <16 x half> %c to <16 x float>
309 %f = fmul <16 x float> %d, %e
313 define <32 x float> @vfwmul_vf_v32f16(ptr %x, half %y) {
314 ; CHECK-LABEL: vfwmul_vf_v32f16:
316 ; CHECK-NEXT: li a1, 32
317 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
318 ; CHECK-NEXT: vle16.v v16, (a0)
319 ; CHECK-NEXT: vfwmul.vf v8, v16, fa0
321 %a = load <32 x half>, ptr %x
322 %b = insertelement <32 x half> poison, half %y, i32 0
323 %c = shufflevector <32 x half> %b, <32 x half> poison, <32 x i32> zeroinitializer
324 %d = fpext <32 x half> %a to <32 x float>
325 %e = fpext <32 x half> %c to <32 x float>
326 %f = fmul <32 x float> %d, %e
330 define <2 x double> @vfwmul_vf_v2f32(ptr %x, float %y) {
331 ; CHECK-LABEL: vfwmul_vf_v2f32:
333 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
334 ; CHECK-NEXT: vle32.v v9, (a0)
335 ; CHECK-NEXT: vfwmul.vf v8, v9, fa0
337 %a = load <2 x float>, ptr %x
338 %b = insertelement <2 x float> poison, float %y, i32 0
339 %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer
340 %d = fpext <2 x float> %a to <2 x double>
341 %e = fpext <2 x float> %c to <2 x double>
342 %f = fmul <2 x double> %d, %e
346 define <4 x double> @vfwmul_vf_v4f32(ptr %x, float %y) {
347 ; CHECK-LABEL: vfwmul_vf_v4f32:
349 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
350 ; CHECK-NEXT: vle32.v v10, (a0)
351 ; CHECK-NEXT: vfwmul.vf v8, v10, fa0
353 %a = load <4 x float>, ptr %x
354 %b = insertelement <4 x float> poison, float %y, i32 0
355 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
356 %d = fpext <4 x float> %a to <4 x double>
357 %e = fpext <4 x float> %c to <4 x double>
358 %f = fmul <4 x double> %d, %e
362 define <8 x double> @vfwmul_vf_v8f32(ptr %x, float %y) {
363 ; CHECK-LABEL: vfwmul_vf_v8f32:
365 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
366 ; CHECK-NEXT: vle32.v v12, (a0)
367 ; CHECK-NEXT: vfwmul.vf v8, v12, fa0
369 %a = load <8 x float>, ptr %x
370 %b = insertelement <8 x float> poison, float %y, i32 0
371 %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer
372 %d = fpext <8 x float> %a to <8 x double>
373 %e = fpext <8 x float> %c to <8 x double>
374 %f = fmul <8 x double> %d, %e
378 define <16 x double> @vfwmul_vf_v16f32(ptr %x, float %y) {
379 ; CHECK-LABEL: vfwmul_vf_v16f32:
381 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
382 ; CHECK-NEXT: vle32.v v16, (a0)
383 ; CHECK-NEXT: vfwmul.vf v8, v16, fa0
385 %a = load <16 x float>, ptr %x
386 %b = insertelement <16 x float> poison, float %y, i32 0
387 %c = shufflevector <16 x float> %b, <16 x float> poison, <16 x i32> zeroinitializer
388 %d = fpext <16 x float> %a to <16 x double>
389 %e = fpext <16 x float> %c to <16 x double>
390 %f = fmul <16 x double> %d, %e
394 define <32 x double> @vfwmul_vf_v32f32(ptr %x, float %y) {
395 ; CHECK-LABEL: vfwmul_vf_v32f32:
397 ; CHECK-NEXT: li a1, 32
398 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
399 ; CHECK-NEXT: vle32.v v24, (a0)
400 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
401 ; CHECK-NEXT: vslidedown.vi v8, v24, 16
402 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
403 ; CHECK-NEXT: vfwmul.vf v16, v8, fa0
404 ; CHECK-NEXT: vfwmul.vf v8, v24, fa0
406 %a = load <32 x float>, ptr %x
407 %b = insertelement <32 x float> poison, float %y, i32 0
408 %c = shufflevector <32 x float> %b, <32 x float> poison, <32 x i32> zeroinitializer
409 %d = fpext <32 x float> %a to <32 x double>
410 %e = fpext <32 x float> %c to <32 x double>
411 %f = fmul <32 x double> %d, %e
415 define <2 x float> @vfwmul_squared_v2f16_v2f32(ptr %x) {
416 ; CHECK-LABEL: vfwmul_squared_v2f16_v2f32:
418 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
419 ; CHECK-NEXT: vle16.v v9, (a0)
420 ; CHECK-NEXT: vfwmul.vv v8, v9, v9
422 %a = load <2 x half>, ptr %x
423 %b = fpext <2 x half> %a to <2 x float>
424 %c = fmul <2 x float> %b, %b
428 define <2 x double> @vfwmul_squared_v2f32_v2f64(ptr %x) {
429 ; CHECK-LABEL: vfwmul_squared_v2f32_v2f64:
431 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
432 ; CHECK-NEXT: vle32.v v9, (a0)
433 ; CHECK-NEXT: vfwmul.vv v8, v9, v9
435 %a = load <2 x float>, ptr %x
436 %b = fpext <2 x float> %a to <2 x double>
437 %c = fmul <2 x double> %b, %b
441 define <2 x double> @vfwmul_squared_v2f16_v2f64(ptr %x) {
442 ; CHECK-LABEL: vfwmul_squared_v2f16_v2f64:
444 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
445 ; CHECK-NEXT: vle16.v v8, (a0)
446 ; CHECK-NEXT: vfwcvt.f.f.v v9, v8
447 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
448 ; CHECK-NEXT: vfwmul.vv v8, v9, v9
450 %a = load <2 x half>, ptr %x
451 %b = fpext <2 x half> %a to <2 x double>
452 %c = fmul <2 x double> %b, %b
456 define <2 x float> @vfwmul_vf2_v2f32(<2 x half> %x, half %y) {
457 ; CHECK-LABEL: vfwmul_vf2_v2f32:
459 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
460 ; CHECK-NEXT: vfwmul.vf v9, v8, fa0
461 ; CHECK-NEXT: vmv1r.v v8, v9
463 %a = fpext <2 x half> %x to <2 x float>
464 %b = fpext half %y to float
465 %c = insertelement <2 x float> poison, float %b, i32 0
466 %d = shufflevector <2 x float> %c, <2 x float> poison, <2 x i32> zeroinitializer
467 %e = fmul <2 x float> %a, %d