1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+f,+d -target-abi=ilp32d \
3 ; RUN: -verify-machineinstrs < %s | FileCheck %s
4 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+f,+d -target-abi=lp64d \
5 ; RUN: -verify-machineinstrs < %s | FileCheck %s
7 define <2 x float> @vfwadd_v2f16(ptr %x, ptr %y) {
8 ; CHECK-LABEL: vfwadd_v2f16:
10 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
11 ; CHECK-NEXT: vle16.v v9, (a0)
12 ; CHECK-NEXT: vle16.v v10, (a1)
13 ; CHECK-NEXT: vfwadd.vv v8, v9, v10
15 %a = load <2 x half>, ptr %x
16 %b = load <2 x half>, ptr %y
17 %c = fpext <2 x half> %a to <2 x float>
18 %d = fpext <2 x half> %b to <2 x float>
19 %e = fadd <2 x float> %c, %d
23 define <4 x float> @vfwadd_v4f16(ptr %x, ptr %y) {
24 ; CHECK-LABEL: vfwadd_v4f16:
26 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
27 ; CHECK-NEXT: vle16.v v9, (a0)
28 ; CHECK-NEXT: vle16.v v10, (a1)
29 ; CHECK-NEXT: vfwadd.vv v8, v9, v10
31 %a = load <4 x half>, ptr %x
32 %b = load <4 x half>, ptr %y
33 %c = fpext <4 x half> %a to <4 x float>
34 %d = fpext <4 x half> %b to <4 x float>
35 %e = fadd <4 x float> %c, %d
39 define <8 x float> @vfwadd_v8f16(ptr %x, ptr %y) {
40 ; CHECK-LABEL: vfwadd_v8f16:
42 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
43 ; CHECK-NEXT: vle16.v v10, (a0)
44 ; CHECK-NEXT: vle16.v v11, (a1)
45 ; CHECK-NEXT: vfwadd.vv v8, v10, v11
47 %a = load <8 x half>, ptr %x
48 %b = load <8 x half>, ptr %y
49 %c = fpext <8 x half> %a to <8 x float>
50 %d = fpext <8 x half> %b to <8 x float>
51 %e = fadd <8 x float> %c, %d
55 define <16 x float> @vfwadd_v16f16(ptr %x, ptr %y) {
56 ; CHECK-LABEL: vfwadd_v16f16:
58 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
59 ; CHECK-NEXT: vle16.v v12, (a0)
60 ; CHECK-NEXT: vle16.v v14, (a1)
61 ; CHECK-NEXT: vfwadd.vv v8, v12, v14
63 %a = load <16 x half>, ptr %x
64 %b = load <16 x half>, ptr %y
65 %c = fpext <16 x half> %a to <16 x float>
66 %d = fpext <16 x half> %b to <16 x float>
67 %e = fadd <16 x float> %c, %d
71 define <32 x float> @vfwadd_v32f16(ptr %x, ptr %y) {
72 ; CHECK-LABEL: vfwadd_v32f16:
74 ; CHECK-NEXT: li a2, 32
75 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
76 ; CHECK-NEXT: vle16.v v16, (a0)
77 ; CHECK-NEXT: vle16.v v20, (a1)
78 ; CHECK-NEXT: vfwadd.vv v8, v16, v20
80 %a = load <32 x half>, ptr %x
81 %b = load <32 x half>, ptr %y
82 %c = fpext <32 x half> %a to <32 x float>
83 %d = fpext <32 x half> %b to <32 x float>
84 %e = fadd <32 x float> %c, %d
88 define <64 x float> @vfwadd_v64f16(ptr %x, ptr %y) {
89 ; CHECK-LABEL: vfwadd_v64f16:
91 ; CHECK-NEXT: addi sp, sp, -16
92 ; CHECK-NEXT: .cfi_def_cfa_offset 16
93 ; CHECK-NEXT: csrr a2, vlenb
94 ; CHECK-NEXT: slli a2, a2, 4
95 ; CHECK-NEXT: sub sp, sp, a2
96 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
97 ; CHECK-NEXT: li a2, 64
98 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma
99 ; CHECK-NEXT: vle16.v v8, (a0)
100 ; CHECK-NEXT: addi a0, sp, 16
101 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
102 ; CHECK-NEXT: vle16.v v0, (a1)
103 ; CHECK-NEXT: li a0, 32
104 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma
105 ; CHECK-NEXT: vslidedown.vx v16, v8, a0
106 ; CHECK-NEXT: vslidedown.vx v8, v0, a0
107 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
108 ; CHECK-NEXT: vmv4r.v v24, v8
109 ; CHECK-NEXT: vfwadd.vv v8, v16, v24
110 ; CHECK-NEXT: csrr a0, vlenb
111 ; CHECK-NEXT: slli a0, a0, 3
112 ; CHECK-NEXT: add a0, sp, a0
113 ; CHECK-NEXT: addi a0, a0, 16
114 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
115 ; CHECK-NEXT: addi a0, sp, 16
116 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
117 ; CHECK-NEXT: vfwadd.vv v8, v16, v0
118 ; CHECK-NEXT: csrr a0, vlenb
119 ; CHECK-NEXT: slli a0, a0, 3
120 ; CHECK-NEXT: add a0, sp, a0
121 ; CHECK-NEXT: addi a0, a0, 16
122 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
123 ; CHECK-NEXT: csrr a0, vlenb
124 ; CHECK-NEXT: slli a0, a0, 4
125 ; CHECK-NEXT: add sp, sp, a0
126 ; CHECK-NEXT: addi sp, sp, 16
128 %a = load <64 x half>, ptr %x
129 %b = load <64 x half>, ptr %y
130 %c = fpext <64 x half> %a to <64 x float>
131 %d = fpext <64 x half> %b to <64 x float>
132 %e = fadd <64 x float> %c, %d
136 define <2 x double> @vfwadd_v2f32(ptr %x, ptr %y) {
137 ; CHECK-LABEL: vfwadd_v2f32:
139 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
140 ; CHECK-NEXT: vle32.v v9, (a0)
141 ; CHECK-NEXT: vle32.v v10, (a1)
142 ; CHECK-NEXT: vfwadd.vv v8, v9, v10
144 %a = load <2 x float>, ptr %x
145 %b = load <2 x float>, ptr %y
146 %c = fpext <2 x float> %a to <2 x double>
147 %d = fpext <2 x float> %b to <2 x double>
148 %e = fadd <2 x double> %c, %d
152 define <4 x double> @vfwadd_v4f32(ptr %x, ptr %y) {
153 ; CHECK-LABEL: vfwadd_v4f32:
155 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
156 ; CHECK-NEXT: vle32.v v10, (a0)
157 ; CHECK-NEXT: vle32.v v11, (a1)
158 ; CHECK-NEXT: vfwadd.vv v8, v10, v11
160 %a = load <4 x float>, ptr %x
161 %b = load <4 x float>, ptr %y
162 %c = fpext <4 x float> %a to <4 x double>
163 %d = fpext <4 x float> %b to <4 x double>
164 %e = fadd <4 x double> %c, %d
168 define <8 x double> @vfwadd_v8f32(ptr %x, ptr %y) {
169 ; CHECK-LABEL: vfwadd_v8f32:
171 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
172 ; CHECK-NEXT: vle32.v v12, (a0)
173 ; CHECK-NEXT: vle32.v v14, (a1)
174 ; CHECK-NEXT: vfwadd.vv v8, v12, v14
176 %a = load <8 x float>, ptr %x
177 %b = load <8 x float>, ptr %y
178 %c = fpext <8 x float> %a to <8 x double>
179 %d = fpext <8 x float> %b to <8 x double>
180 %e = fadd <8 x double> %c, %d
184 define <16 x double> @vfwadd_v16f32(ptr %x, ptr %y) {
185 ; CHECK-LABEL: vfwadd_v16f32:
187 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
188 ; CHECK-NEXT: vle32.v v16, (a0)
189 ; CHECK-NEXT: vle32.v v20, (a1)
190 ; CHECK-NEXT: vfwadd.vv v8, v16, v20
192 %a = load <16 x float>, ptr %x
193 %b = load <16 x float>, ptr %y
194 %c = fpext <16 x float> %a to <16 x double>
195 %d = fpext <16 x float> %b to <16 x double>
196 %e = fadd <16 x double> %c, %d
200 define <32 x double> @vfwadd_v32f32(ptr %x, ptr %y) {
201 ; CHECK-LABEL: vfwadd_v32f32:
203 ; CHECK-NEXT: addi sp, sp, -16
204 ; CHECK-NEXT: .cfi_def_cfa_offset 16
205 ; CHECK-NEXT: csrr a2, vlenb
206 ; CHECK-NEXT: slli a2, a2, 4
207 ; CHECK-NEXT: sub sp, sp, a2
208 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
209 ; CHECK-NEXT: li a2, 32
210 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma
211 ; CHECK-NEXT: vle32.v v8, (a0)
212 ; CHECK-NEXT: addi a0, sp, 16
213 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
214 ; CHECK-NEXT: vle32.v v0, (a1)
215 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
216 ; CHECK-NEXT: vslidedown.vi v16, v8, 16
217 ; CHECK-NEXT: vslidedown.vi v8, v0, 16
218 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
219 ; CHECK-NEXT: vmv4r.v v24, v8
220 ; CHECK-NEXT: vfwadd.vv v8, v16, v24
221 ; CHECK-NEXT: csrr a0, vlenb
222 ; CHECK-NEXT: slli a0, a0, 3
223 ; CHECK-NEXT: add a0, sp, a0
224 ; CHECK-NEXT: addi a0, a0, 16
225 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
226 ; CHECK-NEXT: addi a0, sp, 16
227 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
228 ; CHECK-NEXT: vfwadd.vv v8, v16, v0
229 ; CHECK-NEXT: csrr a0, vlenb
230 ; CHECK-NEXT: slli a0, a0, 3
231 ; CHECK-NEXT: add a0, sp, a0
232 ; CHECK-NEXT: addi a0, a0, 16
233 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
234 ; CHECK-NEXT: csrr a0, vlenb
235 ; CHECK-NEXT: slli a0, a0, 4
236 ; CHECK-NEXT: add sp, sp, a0
237 ; CHECK-NEXT: addi sp, sp, 16
239 %a = load <32 x float>, ptr %x
240 %b = load <32 x float>, ptr %y
241 %c = fpext <32 x float> %a to <32 x double>
242 %d = fpext <32 x float> %b to <32 x double>
243 %e = fadd <32 x double> %c, %d
247 define <2 x float> @vfwadd_vf_v2f16(ptr %x, half %y) {
248 ; CHECK-LABEL: vfwadd_vf_v2f16:
250 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
251 ; CHECK-NEXT: vle16.v v9, (a0)
252 ; CHECK-NEXT: vfwadd.vf v8, v9, fa0
254 %a = load <2 x half>, ptr %x
255 %b = insertelement <2 x half> poison, half %y, i32 0
256 %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer
257 %d = fpext <2 x half> %a to <2 x float>
258 %e = fpext <2 x half> %c to <2 x float>
259 %f = fadd <2 x float> %d, %e
263 define <4 x float> @vfwadd_vf_v4f16(ptr %x, half %y) {
264 ; CHECK-LABEL: vfwadd_vf_v4f16:
266 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
267 ; CHECK-NEXT: vle16.v v9, (a0)
268 ; CHECK-NEXT: vfwadd.vf v8, v9, fa0
270 %a = load <4 x half>, ptr %x
271 %b = insertelement <4 x half> poison, half %y, i32 0
272 %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer
273 %d = fpext <4 x half> %a to <4 x float>
274 %e = fpext <4 x half> %c to <4 x float>
275 %f = fadd <4 x float> %d, %e
279 define <8 x float> @vfwadd_vf_v8f16(ptr %x, half %y) {
280 ; CHECK-LABEL: vfwadd_vf_v8f16:
282 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
283 ; CHECK-NEXT: vle16.v v10, (a0)
284 ; CHECK-NEXT: vfwadd.vf v8, v10, fa0
286 %a = load <8 x half>, ptr %x
287 %b = insertelement <8 x half> poison, half %y, i32 0
288 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
289 %d = fpext <8 x half> %a to <8 x float>
290 %e = fpext <8 x half> %c to <8 x float>
291 %f = fadd <8 x float> %d, %e
295 define <16 x float> @vfwadd_vf_v16f16(ptr %x, half %y) {
296 ; CHECK-LABEL: vfwadd_vf_v16f16:
298 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
299 ; CHECK-NEXT: vle16.v v12, (a0)
300 ; CHECK-NEXT: vfwadd.vf v8, v12, fa0
302 %a = load <16 x half>, ptr %x
303 %b = insertelement <16 x half> poison, half %y, i32 0
304 %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer
305 %d = fpext <16 x half> %a to <16 x float>
306 %e = fpext <16 x half> %c to <16 x float>
307 %f = fadd <16 x float> %d, %e
311 define <32 x float> @vfwadd_vf_v32f16(ptr %x, half %y) {
312 ; CHECK-LABEL: vfwadd_vf_v32f16:
314 ; CHECK-NEXT: li a1, 32
315 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
316 ; CHECK-NEXT: vle16.v v16, (a0)
317 ; CHECK-NEXT: vfwadd.vf v8, v16, fa0
319 %a = load <32 x half>, ptr %x
320 %b = insertelement <32 x half> poison, half %y, i32 0
321 %c = shufflevector <32 x half> %b, <32 x half> poison, <32 x i32> zeroinitializer
322 %d = fpext <32 x half> %a to <32 x float>
323 %e = fpext <32 x half> %c to <32 x float>
324 %f = fadd <32 x float> %d, %e
328 define <2 x double> @vfwadd_vf_v2f32(ptr %x, float %y) {
329 ; CHECK-LABEL: vfwadd_vf_v2f32:
331 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
332 ; CHECK-NEXT: vle32.v v9, (a0)
333 ; CHECK-NEXT: vfwadd.vf v8, v9, fa0
335 %a = load <2 x float>, ptr %x
336 %b = insertelement <2 x float> poison, float %y, i32 0
337 %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer
338 %d = fpext <2 x float> %a to <2 x double>
339 %e = fpext <2 x float> %c to <2 x double>
340 %f = fadd <2 x double> %d, %e
344 define <4 x double> @vfwadd_vf_v4f32(ptr %x, float %y) {
345 ; CHECK-LABEL: vfwadd_vf_v4f32:
347 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
348 ; CHECK-NEXT: vle32.v v10, (a0)
349 ; CHECK-NEXT: vfwadd.vf v8, v10, fa0
351 %a = load <4 x float>, ptr %x
352 %b = insertelement <4 x float> poison, float %y, i32 0
353 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
354 %d = fpext <4 x float> %a to <4 x double>
355 %e = fpext <4 x float> %c to <4 x double>
356 %f = fadd <4 x double> %d, %e
360 define <8 x double> @vfwadd_vf_v8f32(ptr %x, float %y) {
361 ; CHECK-LABEL: vfwadd_vf_v8f32:
363 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
364 ; CHECK-NEXT: vle32.v v12, (a0)
365 ; CHECK-NEXT: vfwadd.vf v8, v12, fa0
367 %a = load <8 x float>, ptr %x
368 %b = insertelement <8 x float> poison, float %y, i32 0
369 %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer
370 %d = fpext <8 x float> %a to <8 x double>
371 %e = fpext <8 x float> %c to <8 x double>
372 %f = fadd <8 x double> %d, %e
376 define <16 x double> @vfwadd_vf_v16f32(ptr %x, float %y) {
377 ; CHECK-LABEL: vfwadd_vf_v16f32:
379 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
380 ; CHECK-NEXT: vle32.v v16, (a0)
381 ; CHECK-NEXT: vfwadd.vf v8, v16, fa0
383 %a = load <16 x float>, ptr %x
384 %b = insertelement <16 x float> poison, float %y, i32 0
385 %c = shufflevector <16 x float> %b, <16 x float> poison, <16 x i32> zeroinitializer
386 %d = fpext <16 x float> %a to <16 x double>
387 %e = fpext <16 x float> %c to <16 x double>
388 %f = fadd <16 x double> %d, %e
392 define <32 x double> @vfwadd_vf_v32f32(ptr %x, float %y) {
393 ; CHECK-LABEL: vfwadd_vf_v32f32:
395 ; CHECK-NEXT: li a1, 32
396 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
397 ; CHECK-NEXT: vle32.v v24, (a0)
398 ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma
399 ; CHECK-NEXT: vslidedown.vi v0, v24, 16
400 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
401 ; CHECK-NEXT: vfmv.v.f v16, fa0
402 ; CHECK-NEXT: vfwcvt.f.f.v v8, v16
403 ; CHECK-NEXT: vfwadd.wv v16, v8, v0
404 ; CHECK-NEXT: vfwadd.wv v8, v8, v24
406 %a = load <32 x float>, ptr %x
407 %b = insertelement <32 x float> poison, float %y, i32 0
408 %c = shufflevector <32 x float> %b, <32 x float> poison, <32 x i32> zeroinitializer
409 %d = fpext <32 x float> %a to <32 x double>
410 %e = fpext <32 x float> %c to <32 x double>
411 %f = fadd <32 x double> %d, %e
415 define <2 x float> @vfwadd_wv_v2f16(ptr %x, ptr %y) {
416 ; CHECK-LABEL: vfwadd_wv_v2f16:
418 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
419 ; CHECK-NEXT: vle32.v v8, (a0)
420 ; CHECK-NEXT: vle16.v v9, (a1)
421 ; CHECK-NEXT: vfwadd.wv v8, v8, v9
423 %a = load <2 x float>, ptr %x
424 %b = load <2 x half>, ptr %y
425 %c = fpext <2 x half> %b to <2 x float>
426 %d = fadd <2 x float> %c, %a
430 define <4 x float> @vfwadd_wv_v4f16(ptr %x, ptr %y) {
431 ; CHECK-LABEL: vfwadd_wv_v4f16:
433 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
434 ; CHECK-NEXT: vle32.v v8, (a0)
435 ; CHECK-NEXT: vle16.v v9, (a1)
436 ; CHECK-NEXT: vfwadd.wv v8, v8, v9
438 %a = load <4 x float>, ptr %x
439 %b = load <4 x half>, ptr %y
440 %c = fpext <4 x half> %b to <4 x float>
441 %d = fadd <4 x float> %c, %a
445 define <8 x float> @vfwadd_wv_v8f16(ptr %x, ptr %y) {
446 ; CHECK-LABEL: vfwadd_wv_v8f16:
448 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
449 ; CHECK-NEXT: vle32.v v8, (a0)
450 ; CHECK-NEXT: vle16.v v10, (a1)
451 ; CHECK-NEXT: vfwadd.wv v8, v8, v10
453 %a = load <8 x float>, ptr %x
454 %b = load <8 x half>, ptr %y
455 %c = fpext <8 x half> %b to <8 x float>
456 %d = fadd <8 x float> %c, %a
460 define <16 x float> @vfwadd_wv_v16f16(ptr %x, ptr %y) {
461 ; CHECK-LABEL: vfwadd_wv_v16f16:
463 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
464 ; CHECK-NEXT: vle32.v v8, (a0)
465 ; CHECK-NEXT: vle16.v v12, (a1)
466 ; CHECK-NEXT: vfwadd.wv v8, v8, v12
468 %a = load <16 x float>, ptr %x
469 %b = load <16 x half>, ptr %y
470 %c = fpext <16 x half> %b to <16 x float>
471 %d = fadd <16 x float> %c, %a
475 define <32 x float> @vfwadd_wv_v32f16(ptr %x, ptr %y) {
476 ; CHECK-LABEL: vfwadd_wv_v32f16:
478 ; CHECK-NEXT: li a2, 32
479 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
480 ; CHECK-NEXT: vle32.v v8, (a0)
481 ; CHECK-NEXT: vle16.v v16, (a1)
482 ; CHECK-NEXT: vfwadd.wv v8, v8, v16
484 %a = load <32 x float>, ptr %x
485 %b = load <32 x half>, ptr %y
486 %c = fpext <32 x half> %b to <32 x float>
487 %d = fadd <32 x float> %c, %a
491 define <2 x double> @vfwadd_wv_v2f32(ptr %x, ptr %y) {
492 ; CHECK-LABEL: vfwadd_wv_v2f32:
494 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
495 ; CHECK-NEXT: vle64.v v8, (a0)
496 ; CHECK-NEXT: vle32.v v9, (a1)
497 ; CHECK-NEXT: vfwadd.wv v8, v8, v9
499 %a = load <2 x double>, ptr %x
500 %b = load <2 x float>, ptr %y
501 %c = fpext <2 x float> %b to <2 x double>
502 %d = fadd <2 x double> %c, %a
506 define <4 x double> @vfwadd_wv_v4f32(ptr %x, ptr %y) {
507 ; CHECK-LABEL: vfwadd_wv_v4f32:
509 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
510 ; CHECK-NEXT: vle64.v v8, (a0)
511 ; CHECK-NEXT: vle32.v v10, (a1)
512 ; CHECK-NEXT: vfwadd.wv v8, v8, v10
514 %a = load <4 x double>, ptr %x
515 %b = load <4 x float>, ptr %y
516 %c = fpext <4 x float> %b to <4 x double>
517 %d = fadd <4 x double> %c, %a
521 define <8 x double> @vfwadd_wv_v8f32(ptr %x, ptr %y) {
522 ; CHECK-LABEL: vfwadd_wv_v8f32:
524 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
525 ; CHECK-NEXT: vle64.v v8, (a0)
526 ; CHECK-NEXT: vle32.v v12, (a1)
527 ; CHECK-NEXT: vfwadd.wv v8, v8, v12
529 %a = load <8 x double>, ptr %x
530 %b = load <8 x float>, ptr %y
531 %c = fpext <8 x float> %b to <8 x double>
532 %d = fadd <8 x double> %c, %a
536 define <16 x double> @vfwadd_wv_v16f32(ptr %x, ptr %y) {
537 ; CHECK-LABEL: vfwadd_wv_v16f32:
539 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
540 ; CHECK-NEXT: vle64.v v8, (a0)
541 ; CHECK-NEXT: vle32.v v16, (a1)
542 ; CHECK-NEXT: vfwadd.wv v8, v8, v16
544 %a = load <16 x double>, ptr %x
545 %b = load <16 x float>, ptr %y
546 %c = fpext <16 x float> %b to <16 x double>
547 %d = fadd <16 x double> %c, %a
551 define <2 x float> @vfwadd_wf_v2f16(ptr %x, half %y) {
552 ; CHECK-LABEL: vfwadd_wf_v2f16:
554 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
555 ; CHECK-NEXT: vle32.v v8, (a0)
556 ; CHECK-NEXT: vfwadd.wf v8, v8, fa0
558 %a = load <2 x float>, ptr %x
559 %b = insertelement <2 x half> poison, half %y, i32 0
560 %c = shufflevector <2 x half> %b, <2 x half> poison, <2 x i32> zeroinitializer
561 %d = fpext <2 x half> %c to <2 x float>
562 %e = fadd <2 x float> %d, %a
566 define <4 x float> @vfwadd_wf_v4f16(ptr %x, half %y) {
567 ; CHECK-LABEL: vfwadd_wf_v4f16:
569 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
570 ; CHECK-NEXT: vle32.v v8, (a0)
571 ; CHECK-NEXT: vfwadd.wf v8, v8, fa0
573 %a = load <4 x float>, ptr %x
574 %b = insertelement <4 x half> poison, half %y, i32 0
575 %c = shufflevector <4 x half> %b, <4 x half> poison, <4 x i32> zeroinitializer
576 %d = fpext <4 x half> %c to <4 x float>
577 %e = fadd <4 x float> %d, %a
581 define <8 x float> @vfwadd_wf_v8f16(ptr %x, half %y) {
582 ; CHECK-LABEL: vfwadd_wf_v8f16:
584 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
585 ; CHECK-NEXT: vle32.v v8, (a0)
586 ; CHECK-NEXT: vfwadd.wf v8, v8, fa0
588 %a = load <8 x float>, ptr %x
589 %b = insertelement <8 x half> poison, half %y, i32 0
590 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
591 %d = fpext <8 x half> %c to <8 x float>
592 %e = fadd <8 x float> %d, %a
596 define <16 x float> @vfwadd_wf_v16f16(ptr %x, half %y) {
597 ; CHECK-LABEL: vfwadd_wf_v16f16:
599 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
600 ; CHECK-NEXT: vle32.v v8, (a0)
601 ; CHECK-NEXT: vfwadd.wf v8, v8, fa0
603 %a = load <16 x float>, ptr %x
604 %b = insertelement <16 x half> poison, half %y, i32 0
605 %c = shufflevector <16 x half> %b, <16 x half> poison, <16 x i32> zeroinitializer
606 %d = fpext <16 x half> %c to <16 x float>
607 %e = fadd <16 x float> %d, %a
611 define <2 x double> @vfwadd_wf_v2f32(ptr %x, float %y) {
612 ; CHECK-LABEL: vfwadd_wf_v2f32:
614 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
615 ; CHECK-NEXT: vle64.v v8, (a0)
616 ; CHECK-NEXT: vfwadd.wf v8, v8, fa0
618 %a = load <2 x double>, ptr %x
619 %b = insertelement <2 x float> poison, float %y, i32 0
620 %c = shufflevector <2 x float> %b, <2 x float> poison, <2 x i32> zeroinitializer
621 %d = fpext <2 x float> %c to <2 x double>
622 %e = fadd <2 x double> %d, %a
626 define <4 x double> @vfwadd_wf_v4f32(ptr %x, float %y) {
627 ; CHECK-LABEL: vfwadd_wf_v4f32:
629 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
630 ; CHECK-NEXT: vle64.v v8, (a0)
631 ; CHECK-NEXT: vfwadd.wf v8, v8, fa0
633 %a = load <4 x double>, ptr %x
634 %b = insertelement <4 x float> poison, float %y, i32 0
635 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
636 %d = fpext <4 x float> %c to <4 x double>
637 %e = fadd <4 x double> %d, %a
641 define <8 x double> @vfwadd_wf_v8f32(ptr %x, float %y) {
642 ; CHECK-LABEL: vfwadd_wf_v8f32:
644 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
645 ; CHECK-NEXT: vle64.v v8, (a0)
646 ; CHECK-NEXT: vfwadd.wf v8, v8, fa0
648 %a = load <8 x double>, ptr %x
649 %b = insertelement <8 x float> poison, float %y, i32 0
650 %c = shufflevector <8 x float> %b, <8 x float> poison, <8 x i32> zeroinitializer
651 %d = fpext <8 x float> %c to <8 x double>
652 %e = fadd <8 x double> %d, %a
656 define <16 x double> @vfwadd_wf_v16f32(ptr %x, float %y) {
657 ; CHECK-LABEL: vfwadd_wf_v16f32:
659 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
660 ; CHECK-NEXT: vle64.v v8, (a0)
661 ; CHECK-NEXT: vfwadd.wf v8, v8, fa0
663 %a = load <16 x double>, ptr %x
664 %b = insertelement <16 x float> poison, float %y, i32 0
665 %c = shufflevector <16 x float> %b, <16 x float> poison, <16 x i32> zeroinitializer
666 %d = fpext <16 x float> %c to <16 x double>
667 %e = fadd <16 x double> %d, %a