1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
3 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
5 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
7 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+d,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32
8 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+d,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV64
9 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfhmin,+f,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32
10 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfhmin,+f,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV64
11 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV32
12 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfhmin,+zvfhmin,+f,+zvl256b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,ZVFHMIN-RV64
14 define void @fadd_v8f16(ptr %x, ptr %y) {
15 ; ZVFH-LABEL: fadd_v8f16:
17 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
18 ; ZVFH-NEXT: vle16.v v8, (a0)
19 ; ZVFH-NEXT: vle16.v v9, (a1)
20 ; ZVFH-NEXT: vfadd.vv v8, v8, v9
21 ; ZVFH-NEXT: vse16.v v8, (a0)
24 ; ZVFHMIN-LABEL: fadd_v8f16:
26 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
27 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
28 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
29 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
30 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
31 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
32 ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v10
33 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
34 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
35 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
37 %a = load <8 x half>, ptr %x
38 %b = load <8 x half>, ptr %y
39 %c = fadd <8 x half> %a, %b
40 store <8 x half> %c, ptr %x
44 define void @fadd_v6f16(ptr %x, ptr %y) {
45 ; ZVFH-LABEL: fadd_v6f16:
47 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
48 ; ZVFH-NEXT: vle16.v v8, (a0)
49 ; ZVFH-NEXT: vle16.v v9, (a1)
50 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
51 ; ZVFH-NEXT: vfadd.vv v8, v8, v9
52 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
53 ; ZVFH-NEXT: vse16.v v8, (a0)
56 ; ZVFHMIN-RV32-LABEL: fadd_v6f16:
57 ; ZVFHMIN-RV32: # %bb.0:
58 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
59 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
60 ; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
61 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
62 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
63 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
64 ; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v8, v10
65 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
66 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
67 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
68 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
69 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
70 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
71 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
72 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
73 ; ZVFHMIN-RV32-NEXT: ret
75 ; ZVFHMIN-RV64-LABEL: fadd_v6f16:
76 ; ZVFHMIN-RV64: # %bb.0:
77 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
78 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
79 ; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
80 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
81 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
82 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
83 ; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v8, v10
84 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
85 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
86 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
87 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
88 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
89 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
90 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
91 ; ZVFHMIN-RV64-NEXT: ret
92 %a = load <6 x half>, ptr %x
93 %b = load <6 x half>, ptr %y
94 %c = fadd <6 x half> %a, %b
95 store <6 x half> %c, ptr %x
99 define void @fadd_v4f32(ptr %x, ptr %y) {
100 ; ZVFH-LABEL: fadd_v4f32:
102 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
103 ; ZVFH-NEXT: vle32.v v8, (a0)
104 ; ZVFH-NEXT: vle32.v v9, (a1)
105 ; ZVFH-NEXT: vfadd.vv v8, v8, v9
106 ; ZVFH-NEXT: vse32.v v8, (a0)
109 ; ZVFHMIN-LABEL: fadd_v4f32:
111 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
112 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
113 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
114 ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9
115 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
117 %a = load <4 x float>, ptr %x
118 %b = load <4 x float>, ptr %y
119 %c = fadd <4 x float> %a, %b
120 store <4 x float> %c, ptr %x
124 define void @fadd_v2f64(ptr %x, ptr %y) {
125 ; CHECK-LABEL: fadd_v2f64:
127 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
128 ; CHECK-NEXT: vle64.v v8, (a0)
129 ; CHECK-NEXT: vle64.v v9, (a1)
130 ; CHECK-NEXT: vfadd.vv v8, v8, v9
131 ; CHECK-NEXT: vse64.v v8, (a0)
133 %a = load <2 x double>, ptr %x
134 %b = load <2 x double>, ptr %y
135 %c = fadd <2 x double> %a, %b
136 store <2 x double> %c, ptr %x
140 define void @fsub_v8f16(ptr %x, ptr %y) {
141 ; ZVFH-LABEL: fsub_v8f16:
143 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
144 ; ZVFH-NEXT: vle16.v v8, (a0)
145 ; ZVFH-NEXT: vle16.v v9, (a1)
146 ; ZVFH-NEXT: vfsub.vv v8, v8, v9
147 ; ZVFH-NEXT: vse16.v v8, (a0)
150 ; ZVFHMIN-LABEL: fsub_v8f16:
152 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
153 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
154 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
155 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
156 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
157 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
158 ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v10
159 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
160 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
161 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
163 %a = load <8 x half>, ptr %x
164 %b = load <8 x half>, ptr %y
165 %c = fsub <8 x half> %a, %b
166 store <8 x half> %c, ptr %x
170 define void @fsub_v6f16(ptr %x, ptr %y) {
171 ; ZVFH-LABEL: fsub_v6f16:
173 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
174 ; ZVFH-NEXT: vle16.v v8, (a0)
175 ; ZVFH-NEXT: vle16.v v9, (a1)
176 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
177 ; ZVFH-NEXT: vfsub.vv v8, v8, v9
178 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
179 ; ZVFH-NEXT: vse16.v v8, (a0)
182 ; ZVFHMIN-RV32-LABEL: fsub_v6f16:
183 ; ZVFHMIN-RV32: # %bb.0:
184 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
185 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
186 ; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
187 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
188 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
189 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
190 ; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v8, v10
191 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
192 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
193 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
194 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
195 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
196 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
197 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
198 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
199 ; ZVFHMIN-RV32-NEXT: ret
201 ; ZVFHMIN-RV64-LABEL: fsub_v6f16:
202 ; ZVFHMIN-RV64: # %bb.0:
203 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
204 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
205 ; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
206 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
207 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
208 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
209 ; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v8, v10
210 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
211 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
212 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
213 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
214 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
215 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
216 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
217 ; ZVFHMIN-RV64-NEXT: ret
218 %a = load <6 x half>, ptr %x
219 %b = load <6 x half>, ptr %y
220 %c = fsub <6 x half> %a, %b
221 store <6 x half> %c, ptr %x
225 define void @fsub_v4f32(ptr %x, ptr %y) {
226 ; ZVFH-LABEL: fsub_v4f32:
228 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
229 ; ZVFH-NEXT: vle32.v v8, (a0)
230 ; ZVFH-NEXT: vle32.v v9, (a1)
231 ; ZVFH-NEXT: vfsub.vv v8, v8, v9
232 ; ZVFH-NEXT: vse32.v v8, (a0)
235 ; ZVFHMIN-LABEL: fsub_v4f32:
237 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
238 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
239 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
240 ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9
241 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
243 %a = load <4 x float>, ptr %x
244 %b = load <4 x float>, ptr %y
245 %c = fsub <4 x float> %a, %b
246 store <4 x float> %c, ptr %x
250 define void @fsub_v2f64(ptr %x, ptr %y) {
251 ; CHECK-LABEL: fsub_v2f64:
253 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
254 ; CHECK-NEXT: vle64.v v8, (a0)
255 ; CHECK-NEXT: vle64.v v9, (a1)
256 ; CHECK-NEXT: vfsub.vv v8, v8, v9
257 ; CHECK-NEXT: vse64.v v8, (a0)
259 %a = load <2 x double>, ptr %x
260 %b = load <2 x double>, ptr %y
261 %c = fsub <2 x double> %a, %b
262 store <2 x double> %c, ptr %x
266 define void @fmul_v8f16(ptr %x, ptr %y) {
267 ; ZVFH-LABEL: fmul_v8f16:
269 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
270 ; ZVFH-NEXT: vle16.v v8, (a0)
271 ; ZVFH-NEXT: vle16.v v9, (a1)
272 ; ZVFH-NEXT: vfmul.vv v8, v8, v9
273 ; ZVFH-NEXT: vse16.v v8, (a0)
276 ; ZVFHMIN-LABEL: fmul_v8f16:
278 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
279 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
280 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
281 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
282 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
283 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
284 ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v10
285 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
286 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
287 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
289 %a = load <8 x half>, ptr %x
290 %b = load <8 x half>, ptr %y
291 %c = fmul <8 x half> %a, %b
292 store <8 x half> %c, ptr %x
296 define void @fmul_v6f16(ptr %x, ptr %y) {
297 ; ZVFH-LABEL: fmul_v6f16:
299 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
300 ; ZVFH-NEXT: vle16.v v8, (a0)
301 ; ZVFH-NEXT: vle16.v v9, (a1)
302 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
303 ; ZVFH-NEXT: vfmul.vv v8, v8, v9
304 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
305 ; ZVFH-NEXT: vse16.v v8, (a0)
308 ; ZVFHMIN-RV32-LABEL: fmul_v6f16:
309 ; ZVFHMIN-RV32: # %bb.0:
310 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
311 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
312 ; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
313 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
314 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
315 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
316 ; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v8, v10
317 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
318 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
319 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
320 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
321 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
322 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
323 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
324 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
325 ; ZVFHMIN-RV32-NEXT: ret
327 ; ZVFHMIN-RV64-LABEL: fmul_v6f16:
328 ; ZVFHMIN-RV64: # %bb.0:
329 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
330 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
331 ; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
332 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
333 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
334 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
335 ; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v8, v10
336 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
337 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
338 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
339 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
340 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
341 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
342 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
343 ; ZVFHMIN-RV64-NEXT: ret
344 %a = load <6 x half>, ptr %x
345 %b = load <6 x half>, ptr %y
346 %c = fmul <6 x half> %a, %b
347 store <6 x half> %c, ptr %x
351 define void @fmul_v4f32(ptr %x, ptr %y) {
352 ; ZVFH-LABEL: fmul_v4f32:
354 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
355 ; ZVFH-NEXT: vle32.v v8, (a0)
356 ; ZVFH-NEXT: vle32.v v9, (a1)
357 ; ZVFH-NEXT: vfmul.vv v8, v8, v9
358 ; ZVFH-NEXT: vse32.v v8, (a0)
361 ; ZVFHMIN-LABEL: fmul_v4f32:
363 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
364 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
365 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
366 ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9
367 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
369 %a = load <4 x float>, ptr %x
370 %b = load <4 x float>, ptr %y
371 %c = fmul <4 x float> %a, %b
372 store <4 x float> %c, ptr %x
376 define void @fmul_v2f64(ptr %x, ptr %y) {
377 ; CHECK-LABEL: fmul_v2f64:
379 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
380 ; CHECK-NEXT: vle64.v v8, (a0)
381 ; CHECK-NEXT: vle64.v v9, (a1)
382 ; CHECK-NEXT: vfmul.vv v8, v8, v9
383 ; CHECK-NEXT: vse64.v v8, (a0)
385 %a = load <2 x double>, ptr %x
386 %b = load <2 x double>, ptr %y
387 %c = fmul <2 x double> %a, %b
388 store <2 x double> %c, ptr %x
392 define void @fdiv_v8f16(ptr %x, ptr %y) {
393 ; ZVFH-LABEL: fdiv_v8f16:
395 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
396 ; ZVFH-NEXT: vle16.v v8, (a0)
397 ; ZVFH-NEXT: vle16.v v9, (a1)
398 ; ZVFH-NEXT: vfdiv.vv v8, v8, v9
399 ; ZVFH-NEXT: vse16.v v8, (a0)
402 ; ZVFHMIN-LABEL: fdiv_v8f16:
404 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
405 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
406 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
407 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
408 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
409 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
410 ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v10
411 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
412 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
413 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
415 %a = load <8 x half>, ptr %x
416 %b = load <8 x half>, ptr %y
417 %c = fdiv <8 x half> %a, %b
418 store <8 x half> %c, ptr %x
422 define void @fdiv_v6f16(ptr %x, ptr %y) {
423 ; ZVFH-LABEL: fdiv_v6f16:
425 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
426 ; ZVFH-NEXT: vle16.v v8, (a0)
427 ; ZVFH-NEXT: vle16.v v9, (a1)
428 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
429 ; ZVFH-NEXT: vfdiv.vv v8, v8, v9
430 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
431 ; ZVFH-NEXT: vse16.v v8, (a0)
434 ; ZVFHMIN-RV32-LABEL: fdiv_v6f16:
435 ; ZVFHMIN-RV32: # %bb.0:
436 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
437 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
438 ; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
439 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
440 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
441 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
442 ; ZVFHMIN-RV32-NEXT: vfdiv.vv v8, v8, v10
443 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
444 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
445 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
446 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
447 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
448 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
449 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
450 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
451 ; ZVFHMIN-RV32-NEXT: ret
453 ; ZVFHMIN-RV64-LABEL: fdiv_v6f16:
454 ; ZVFHMIN-RV64: # %bb.0:
455 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
456 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
457 ; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
458 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
459 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
460 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
461 ; ZVFHMIN-RV64-NEXT: vfdiv.vv v8, v8, v10
462 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
463 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
464 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
465 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
466 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
467 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
468 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
469 ; ZVFHMIN-RV64-NEXT: ret
470 %a = load <6 x half>, ptr %x
471 %b = load <6 x half>, ptr %y
472 %c = fdiv <6 x half> %a, %b
473 store <6 x half> %c, ptr %x
477 define void @fdiv_v4f32(ptr %x, ptr %y) {
478 ; ZVFH-LABEL: fdiv_v4f32:
480 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
481 ; ZVFH-NEXT: vle32.v v8, (a0)
482 ; ZVFH-NEXT: vle32.v v9, (a1)
483 ; ZVFH-NEXT: vfdiv.vv v8, v8, v9
484 ; ZVFH-NEXT: vse32.v v8, (a0)
487 ; ZVFHMIN-LABEL: fdiv_v4f32:
489 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
490 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
491 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
492 ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9
493 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
495 %a = load <4 x float>, ptr %x
496 %b = load <4 x float>, ptr %y
497 %c = fdiv <4 x float> %a, %b
498 store <4 x float> %c, ptr %x
502 define void @fdiv_v2f64(ptr %x, ptr %y) {
503 ; CHECK-LABEL: fdiv_v2f64:
505 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
506 ; CHECK-NEXT: vle64.v v8, (a0)
507 ; CHECK-NEXT: vle64.v v9, (a1)
508 ; CHECK-NEXT: vfdiv.vv v8, v8, v9
509 ; CHECK-NEXT: vse64.v v8, (a0)
511 %a = load <2 x double>, ptr %x
512 %b = load <2 x double>, ptr %y
513 %c = fdiv <2 x double> %a, %b
514 store <2 x double> %c, ptr %x
518 define void @fneg_v8f16(ptr %x) {
519 ; ZVFH-LABEL: fneg_v8f16:
521 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
522 ; ZVFH-NEXT: vle16.v v8, (a0)
523 ; ZVFH-NEXT: vfneg.v v8, v8
524 ; ZVFH-NEXT: vse16.v v8, (a0)
527 ; ZVFHMIN-LABEL: fneg_v8f16:
529 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
530 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
531 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
532 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
533 ; ZVFHMIN-NEXT: vfneg.v v8, v9
534 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
535 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
536 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
538 %a = load <8 x half>, ptr %x
539 %b = fneg <8 x half> %a
540 store <8 x half> %b, ptr %x
544 define void @fneg_v6f16(ptr %x) {
545 ; ZVFH-LABEL: fneg_v6f16:
547 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
548 ; ZVFH-NEXT: vle16.v v8, (a0)
549 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
550 ; ZVFH-NEXT: vfneg.v v8, v8
551 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
552 ; ZVFH-NEXT: vse16.v v8, (a0)
555 ; ZVFHMIN-RV32-LABEL: fneg_v6f16:
556 ; ZVFHMIN-RV32: # %bb.0:
557 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
558 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
559 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
560 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
561 ; ZVFHMIN-RV32-NEXT: vfneg.v v8, v9
562 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
563 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
564 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
565 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
566 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
567 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
568 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
569 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
570 ; ZVFHMIN-RV32-NEXT: ret
572 ; ZVFHMIN-RV64-LABEL: fneg_v6f16:
573 ; ZVFHMIN-RV64: # %bb.0:
574 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
575 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
576 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
577 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
578 ; ZVFHMIN-RV64-NEXT: vfneg.v v8, v9
579 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
580 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
581 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
582 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
583 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
584 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
585 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
586 ; ZVFHMIN-RV64-NEXT: ret
587 %a = load <6 x half>, ptr %x
588 %b = fneg <6 x half> %a
589 store <6 x half> %b, ptr %x
593 define void @fneg_v4f32(ptr %x) {
594 ; ZVFH-LABEL: fneg_v4f32:
596 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
597 ; ZVFH-NEXT: vle32.v v8, (a0)
598 ; ZVFH-NEXT: vfneg.v v8, v8
599 ; ZVFH-NEXT: vse32.v v8, (a0)
602 ; ZVFHMIN-LABEL: fneg_v4f32:
604 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
605 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
606 ; ZVFHMIN-NEXT: vfneg.v v8, v8
607 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
609 %a = load <4 x float>, ptr %x
610 %b = fneg <4 x float> %a
611 store <4 x float> %b, ptr %x
615 define void @fneg_v2f64(ptr %x) {
616 ; CHECK-LABEL: fneg_v2f64:
618 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
619 ; CHECK-NEXT: vle64.v v8, (a0)
620 ; CHECK-NEXT: vfneg.v v8, v8
621 ; CHECK-NEXT: vse64.v v8, (a0)
623 %a = load <2 x double>, ptr %x
624 %b = fneg <2 x double> %a
625 store <2 x double> %b, ptr %x
629 define void @fabs_v8f16(ptr %x) {
630 ; ZVFH-LABEL: fabs_v8f16:
632 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
633 ; ZVFH-NEXT: vle16.v v8, (a0)
634 ; ZVFH-NEXT: vfabs.v v8, v8
635 ; ZVFH-NEXT: vse16.v v8, (a0)
638 ; ZVFHMIN-LABEL: fabs_v8f16:
640 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
641 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
642 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
643 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
644 ; ZVFHMIN-NEXT: vfabs.v v8, v9
645 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
646 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
647 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
649 %a = load <8 x half>, ptr %x
650 %b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
651 store <8 x half> %b, ptr %x
654 declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
656 define void @fabs_v6f16(ptr %x) {
657 ; ZVFH-LABEL: fabs_v6f16:
659 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
660 ; ZVFH-NEXT: vle16.v v8, (a0)
661 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
662 ; ZVFH-NEXT: vfabs.v v8, v8
663 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
664 ; ZVFH-NEXT: vse16.v v8, (a0)
667 ; ZVFHMIN-RV32-LABEL: fabs_v6f16:
668 ; ZVFHMIN-RV32: # %bb.0:
669 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
670 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
671 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
672 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
673 ; ZVFHMIN-RV32-NEXT: vfabs.v v8, v9
674 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
675 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
676 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
677 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
678 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
679 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
680 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
681 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
682 ; ZVFHMIN-RV32-NEXT: ret
684 ; ZVFHMIN-RV64-LABEL: fabs_v6f16:
685 ; ZVFHMIN-RV64: # %bb.0:
686 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
687 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
688 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
689 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
690 ; ZVFHMIN-RV64-NEXT: vfabs.v v8, v9
691 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
692 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
693 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
694 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
695 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
696 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
697 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
698 ; ZVFHMIN-RV64-NEXT: ret
699 %a = load <6 x half>, ptr %x
700 %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
701 store <6 x half> %b, ptr %x
704 declare <6 x half> @llvm.fabs.v6f16(<6 x half>)
706 define void @fabs_v4f32(ptr %x) {
707 ; ZVFH-LABEL: fabs_v4f32:
709 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
710 ; ZVFH-NEXT: vle32.v v8, (a0)
711 ; ZVFH-NEXT: vfabs.v v8, v8
712 ; ZVFH-NEXT: vse32.v v8, (a0)
715 ; ZVFHMIN-LABEL: fabs_v4f32:
717 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
718 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
719 ; ZVFHMIN-NEXT: vfabs.v v8, v8
720 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
722 %a = load <4 x float>, ptr %x
723 %b = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
724 store <4 x float> %b, ptr %x
727 declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
729 define void @fabs_v2f64(ptr %x) {
730 ; CHECK-LABEL: fabs_v2f64:
732 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
733 ; CHECK-NEXT: vle64.v v8, (a0)
734 ; CHECK-NEXT: vfabs.v v8, v8
735 ; CHECK-NEXT: vse64.v v8, (a0)
737 %a = load <2 x double>, ptr %x
738 %b = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
739 store <2 x double> %b, ptr %x
742 declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
744 define void @copysign_v8f16(ptr %x, ptr %y) {
745 ; ZVFH-LABEL: copysign_v8f16:
747 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
748 ; ZVFH-NEXT: vle16.v v8, (a0)
749 ; ZVFH-NEXT: vle16.v v9, (a1)
750 ; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
751 ; ZVFH-NEXT: vse16.v v8, (a0)
754 ; ZVFHMIN-LABEL: copysign_v8f16:
756 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
757 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
758 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
759 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
760 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
761 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
762 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v10
763 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
764 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
765 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
767 %a = load <8 x half>, ptr %x
768 %b = load <8 x half>, ptr %y
769 %c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
770 store <8 x half> %c, ptr %x
773 declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
775 define void @copysign_v6f16(ptr %x, ptr %y) {
776 ; ZVFH-LABEL: copysign_v6f16:
778 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
779 ; ZVFH-NEXT: vle16.v v8, (a0)
780 ; ZVFH-NEXT: vle16.v v9, (a1)
781 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
782 ; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
783 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
784 ; ZVFH-NEXT: vse16.v v8, (a0)
787 ; ZVFHMIN-RV32-LABEL: copysign_v6f16:
788 ; ZVFHMIN-RV32: # %bb.0:
789 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
790 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
791 ; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
792 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
793 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
794 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
795 ; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v8, v10
796 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
797 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
798 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
799 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
800 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
801 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
802 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
803 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
804 ; ZVFHMIN-RV32-NEXT: ret
806 ; ZVFHMIN-RV64-LABEL: copysign_v6f16:
807 ; ZVFHMIN-RV64: # %bb.0:
808 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
809 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
810 ; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
811 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
812 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
813 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
814 ; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v8, v10
815 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
816 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
817 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
818 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
819 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
820 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
821 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
822 ; ZVFHMIN-RV64-NEXT: ret
823 %a = load <6 x half>, ptr %x
824 %b = load <6 x half>, ptr %y
825 %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b)
826 store <6 x half> %c, ptr %x
829 declare <6 x half> @llvm.copysign.v6f16(<6 x half>, <6 x half>)
831 define void @copysign_v4f32(ptr %x, ptr %y) {
832 ; ZVFH-LABEL: copysign_v4f32:
834 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
835 ; ZVFH-NEXT: vle32.v v8, (a0)
836 ; ZVFH-NEXT: vle32.v v9, (a1)
837 ; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
838 ; ZVFH-NEXT: vse32.v v8, (a0)
841 ; ZVFHMIN-LABEL: copysign_v4f32:
843 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
844 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
845 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
846 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v9
847 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
849 %a = load <4 x float>, ptr %x
850 %b = load <4 x float>, ptr %y
851 %c = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
852 store <4 x float> %c, ptr %x
855 declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
857 define void @copysign_v2f64(ptr %x, ptr %y) {
858 ; CHECK-LABEL: copysign_v2f64:
860 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
861 ; CHECK-NEXT: vle64.v v8, (a0)
862 ; CHECK-NEXT: vle64.v v9, (a1)
863 ; CHECK-NEXT: vfsgnj.vv v8, v8, v9
864 ; CHECK-NEXT: vse64.v v8, (a0)
866 %a = load <2 x double>, ptr %x
867 %b = load <2 x double>, ptr %y
868 %c = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
869 store <2 x double> %c, ptr %x
872 declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
874 define void @copysign_vf_v8f16(ptr %x, half %y) {
875 ; ZVFH-LABEL: copysign_vf_v8f16:
877 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
878 ; ZVFH-NEXT: vle16.v v8, (a0)
879 ; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0
880 ; ZVFH-NEXT: vse16.v v8, (a0)
883 ; ZVFHMIN-LABEL: copysign_vf_v8f16:
885 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
886 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
887 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
888 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
889 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
890 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
891 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
892 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
893 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
894 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
895 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
896 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8
897 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
898 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
899 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
901 %a = load <8 x half>, ptr %x
902 %b = insertelement <8 x half> poison, half %y, i32 0
903 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
904 %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c)
905 store <8 x half> %d, ptr %x
909 define void @copysign_vf_v6f16(ptr %x, half %y) {
910 ; ZVFH-LABEL: copysign_vf_v6f16:
912 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
913 ; ZVFH-NEXT: vle16.v v8, (a0)
914 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
915 ; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0
916 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
917 ; ZVFH-NEXT: vse16.v v8, (a0)
920 ; ZVFHMIN-RV32-LABEL: copysign_vf_v6f16:
921 ; ZVFHMIN-RV32: # %bb.0:
922 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
923 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
924 ; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
925 ; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
926 ; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
927 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
928 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
929 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
930 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
931 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
932 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
933 ; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v9, v8
934 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
935 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
936 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
937 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
938 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
939 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
940 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
941 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
942 ; ZVFHMIN-RV32-NEXT: ret
944 ; ZVFHMIN-RV64-LABEL: copysign_vf_v6f16:
945 ; ZVFHMIN-RV64: # %bb.0:
946 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
947 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
948 ; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
949 ; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
950 ; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
951 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
952 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
953 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
954 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
955 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
956 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
957 ; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v9, v8
958 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
959 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
960 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
961 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
962 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
963 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
964 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
965 ; ZVFHMIN-RV64-NEXT: ret
966 %a = load <6 x half>, ptr %x
967 %b = insertelement <6 x half> poison, half %y, i32 0
968 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
969 %d = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %c)
970 store <6 x half> %d, ptr %x
974 define void @copysign_vf_v4f32(ptr %x, float %y) {
975 ; ZVFH-LABEL: copysign_vf_v4f32:
977 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
978 ; ZVFH-NEXT: vle32.v v8, (a0)
979 ; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0
980 ; ZVFH-NEXT: vse32.v v8, (a0)
983 ; ZVFHMIN-LABEL: copysign_vf_v4f32:
985 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
986 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
987 ; ZVFHMIN-NEXT: vfsgnj.vf v8, v8, fa0
988 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
990 %a = load <4 x float>, ptr %x
991 %b = insertelement <4 x float> poison, float %y, i32 0
992 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
993 %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c)
994 store <4 x float> %d, ptr %x
998 define void @copysign_vf_v2f64(ptr %x, double %y) {
999 ; CHECK-LABEL: copysign_vf_v2f64:
1001 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1002 ; CHECK-NEXT: vle64.v v8, (a0)
1003 ; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
1004 ; CHECK-NEXT: vse64.v v8, (a0)
1006 %a = load <2 x double>, ptr %x
1007 %b = insertelement <2 x double> poison, double %y, i32 0
1008 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
1009 %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c)
1010 store <2 x double> %d, ptr %x
1014 define void @copysign_neg_v8f16(ptr %x, ptr %y) {
1015 ; ZVFH-LABEL: copysign_neg_v8f16:
1017 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1018 ; ZVFH-NEXT: vle16.v v8, (a0)
1019 ; ZVFH-NEXT: vle16.v v9, (a1)
1020 ; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9
1021 ; ZVFH-NEXT: vse16.v v8, (a0)
1024 ; ZVFHMIN-LABEL: copysign_neg_v8f16:
1026 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1027 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
1028 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1029 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
1030 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1031 ; ZVFHMIN-NEXT: vfneg.v v8, v10
1032 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1033 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
1034 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
1035 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
1036 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1037 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v8, v9
1038 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1039 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
1040 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
1042 %a = load <8 x half>, ptr %x
1043 %b = load <8 x half>, ptr %y
1044 %c = fneg <8 x half> %b
1045 %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c)
1046 store <8 x half> %d, ptr %x
1050 define void @copysign_neg_v6f16(ptr %x, ptr %y) {
1051 ; ZVFH-LABEL: copysign_neg_v6f16:
1053 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1054 ; ZVFH-NEXT: vle16.v v8, (a0)
1055 ; ZVFH-NEXT: vle16.v v9, (a1)
1056 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1057 ; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9
1058 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1059 ; ZVFH-NEXT: vse16.v v8, (a0)
1062 ; ZVFHMIN-RV32-LABEL: copysign_neg_v6f16:
1063 ; ZVFHMIN-RV32: # %bb.0:
1064 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1065 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
1066 ; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
1067 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
1068 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1069 ; ZVFHMIN-RV32-NEXT: vfneg.v v8, v10
1070 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1071 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v8
1072 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
1073 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
1074 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1075 ; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v8, v9
1076 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1077 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
1078 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1079 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
1080 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
1081 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
1082 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1083 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
1084 ; ZVFHMIN-RV32-NEXT: ret
1086 ; ZVFHMIN-RV64-LABEL: copysign_neg_v6f16:
1087 ; ZVFHMIN-RV64: # %bb.0:
1088 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1089 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
1090 ; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
1091 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
1092 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1093 ; ZVFHMIN-RV64-NEXT: vfneg.v v8, v10
1094 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1095 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v8
1096 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
1097 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
1098 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1099 ; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v8, v9
1100 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1101 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
1102 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1103 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
1104 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
1105 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
1106 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
1107 ; ZVFHMIN-RV64-NEXT: ret
1108 %a = load <6 x half>, ptr %x
1109 %b = load <6 x half>, ptr %y
1110 %c = fneg <6 x half> %b
1111 %d = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %c)
1112 store <6 x half> %d, ptr %x
1116 define void @copysign_neg_v4f32(ptr %x, ptr %y) {
1117 ; ZVFH-LABEL: copysign_neg_v4f32:
1119 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1120 ; ZVFH-NEXT: vle32.v v8, (a0)
1121 ; ZVFH-NEXT: vle32.v v9, (a1)
1122 ; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9
1123 ; ZVFH-NEXT: vse32.v v8, (a0)
1126 ; ZVFHMIN-LABEL: copysign_neg_v4f32:
1128 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
1129 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
1130 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
1131 ; ZVFHMIN-NEXT: vfsgnjn.vv v8, v8, v9
1132 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
1134 %a = load <4 x float>, ptr %x
1135 %b = load <4 x float>, ptr %y
1136 %c = fneg <4 x float> %b
1137 %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c)
1138 store <4 x float> %d, ptr %x
1142 define void @copysign_neg_v2f64(ptr %x, ptr %y) {
1143 ; CHECK-LABEL: copysign_neg_v2f64:
1145 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1146 ; CHECK-NEXT: vle64.v v8, (a0)
1147 ; CHECK-NEXT: vle64.v v9, (a1)
1148 ; CHECK-NEXT: vfsgnjn.vv v8, v8, v9
1149 ; CHECK-NEXT: vse64.v v8, (a0)
1151 %a = load <2 x double>, ptr %x
1152 %b = load <2 x double>, ptr %y
1153 %c = fneg <2 x double> %b
1154 %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c)
1155 store <2 x double> %d, ptr %x
1159 define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
1160 ; ZVFH-LABEL: copysign_neg_trunc_v4f16_v4f32:
1162 ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1163 ; ZVFH-NEXT: vle32.v v8, (a1)
1164 ; ZVFH-NEXT: vle16.v v9, (a0)
1165 ; ZVFH-NEXT: vfncvt.f.f.w v10, v8
1166 ; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10
1167 ; ZVFH-NEXT: vse16.v v8, (a0)
1170 ; ZVFHMIN-LABEL: copysign_neg_trunc_v4f16_v4f32:
1172 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1173 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
1174 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
1175 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
1176 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
1177 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
1178 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1179 ; ZVFHMIN-NEXT: vfneg.v v8, v9
1180 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1181 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
1182 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
1183 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1184 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v10, v8
1185 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1186 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
1187 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
1189 %a = load <4 x half>, ptr %x
1190 %b = load <4 x float>, ptr %y
1191 %c = fneg <4 x float> %b
1192 %d = fptrunc <4 x float> %c to <4 x half>
1193 %e = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %d)
1194 store <4 x half> %e, ptr %x
1197 declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>)
1199 define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
1200 ; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32:
1202 ; ZVFH-NEXT: vsetivli zero, 3, e32, m1, ta, ma
1203 ; ZVFH-NEXT: vle32.v v8, (a1)
1204 ; ZVFH-NEXT: vle16.v v9, (a0)
1205 ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1206 ; ZVFH-NEXT: vfncvt.f.f.w v10, v8
1207 ; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10
1208 ; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
1209 ; ZVFH-NEXT: vse16.v v8, (a0)
1212 ; ZVFHMIN-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
1213 ; ZVFHMIN-RV32: # %bb.0:
1214 ; ZVFHMIN-RV32-NEXT: addi sp, sp, -16
1215 ; ZVFHMIN-RV32-NEXT: .cfi_def_cfa_offset 16
1216 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1217 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
1218 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
1219 ; ZVFHMIN-RV32-NEXT: vle32.v v9, (a1)
1220 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1221 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v8
1222 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
1223 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
1224 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1225 ; ZVFHMIN-RV32-NEXT: vfneg.v v8, v9
1226 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1227 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
1228 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
1229 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1230 ; ZVFHMIN-RV32-NEXT: vfsgnj.vv v8, v10, v8
1231 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1232 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
1233 ; ZVFHMIN-RV32-NEXT: addi a1, sp, 8
1234 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a1)
1235 ; ZVFHMIN-RV32-NEXT: flh fa5, 12(sp)
1236 ; ZVFHMIN-RV32-NEXT: fsh fa5, 4(a0)
1237 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1238 ; ZVFHMIN-RV32-NEXT: vse32.v v9, (a0)
1239 ; ZVFHMIN-RV32-NEXT: addi sp, sp, 16
1240 ; ZVFHMIN-RV32-NEXT: ret
1242 ; ZVFHMIN-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
1243 ; ZVFHMIN-RV64: # %bb.0:
1244 ; ZVFHMIN-RV64-NEXT: addi sp, sp, -16
1245 ; ZVFHMIN-RV64-NEXT: .cfi_def_cfa_offset 16
1246 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
1247 ; ZVFHMIN-RV64-NEXT: vle64.v v8, (a0)
1248 ; ZVFHMIN-RV64-NEXT: mv a2, sp
1249 ; ZVFHMIN-RV64-NEXT: vse64.v v8, (a2)
1250 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1251 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a2)
1252 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 3, e32, mf2, ta, ma
1253 ; ZVFHMIN-RV64-NEXT: vle32.v v9, (a1)
1254 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1255 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v8
1256 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
1257 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
1258 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1259 ; ZVFHMIN-RV64-NEXT: vfneg.v v8, v9
1260 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1261 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
1262 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
1263 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
1264 ; ZVFHMIN-RV64-NEXT: vfsgnj.vv v8, v10, v8
1265 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
1266 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
1267 ; ZVFHMIN-RV64-NEXT: addi a1, sp, 8
1268 ; ZVFHMIN-RV64-NEXT: vse16.v v9, (a1)
1269 ; ZVFHMIN-RV64-NEXT: flh fa5, 12(sp)
1270 ; ZVFHMIN-RV64-NEXT: fsh fa5, 4(a0)
1271 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1272 ; ZVFHMIN-RV64-NEXT: vse32.v v9, (a0)
1273 ; ZVFHMIN-RV64-NEXT: addi sp, sp, 16
1274 ; ZVFHMIN-RV64-NEXT: ret
1275 %a = load <3 x half>, ptr %x
1276 %b = load <3 x float>, ptr %y
1277 %c = fneg <3 x float> %b
1278 %d = fptrunc <3 x float> %c to <3 x half>
1279 %e = call <3 x half> @llvm.copysign.v3f16(<3 x half> %a, <3 x half> %d)
1280 store <3 x half> %e, ptr %x
1283 declare <3 x half> @llvm.copysign.v3f16(<3 x half>, <3 x half>)
1285 define void @copysign_neg_ext_v2f64_v2f32(ptr %x, ptr %y) {
1286 ; CHECK-LABEL: copysign_neg_ext_v2f64_v2f32:
1288 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1289 ; CHECK-NEXT: vle32.v v8, (a1)
1290 ; CHECK-NEXT: vle64.v v9, (a0)
1291 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8
1292 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1293 ; CHECK-NEXT: vfsgnjn.vv v8, v9, v10
1294 ; CHECK-NEXT: vse64.v v8, (a0)
1296 %a = load <2 x double>, ptr %x
1297 %b = load <2 x float>, ptr %y
1298 %c = fneg <2 x float> %b
1299 %d = fpext <2 x float> %c to <2 x double>
1300 %e = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %d)
1301 store <2 x double> %e, ptr %x
1305 define void @sqrt_v8f16(ptr %x) {
1306 ; ZVFH-LABEL: sqrt_v8f16:
1308 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1309 ; ZVFH-NEXT: vle16.v v8, (a0)
1310 ; ZVFH-NEXT: vfsqrt.v v8, v8
1311 ; ZVFH-NEXT: vse16.v v8, (a0)
1314 ; ZVFHMIN-LABEL: sqrt_v8f16:
1316 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1317 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
1318 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
1319 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1320 ; ZVFHMIN-NEXT: vfsqrt.v v8, v9
1321 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1322 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
1323 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
1325 %a = load <8 x half>, ptr %x
1326 %b = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a)
1327 store <8 x half> %b, ptr %x
1330 declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
1332 define void @sqrt_v6f16(ptr %x) {
1333 ; ZVFH-LABEL: sqrt_v6f16:
1335 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1336 ; ZVFH-NEXT: vle16.v v8, (a0)
1337 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1338 ; ZVFH-NEXT: vfsqrt.v v8, v8
1339 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1340 ; ZVFH-NEXT: vse16.v v8, (a0)
1343 ; ZVFHMIN-RV32-LABEL: sqrt_v6f16:
1344 ; ZVFHMIN-RV32: # %bb.0:
1345 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1346 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
1347 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
1348 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1349 ; ZVFHMIN-RV32-NEXT: vfsqrt.v v8, v9
1350 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1351 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
1352 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
1353 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1354 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
1355 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
1356 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1357 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
1358 ; ZVFHMIN-RV32-NEXT: ret
1360 ; ZVFHMIN-RV64-LABEL: sqrt_v6f16:
1361 ; ZVFHMIN-RV64: # %bb.0:
1362 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1363 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
1364 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
1365 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1366 ; ZVFHMIN-RV64-NEXT: vfsqrt.v v8, v9
1367 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1368 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
1369 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1370 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
1371 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
1372 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
1373 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
1374 ; ZVFHMIN-RV64-NEXT: ret
1375 %a = load <6 x half>, ptr %x
1376 %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a)
1377 store <6 x half> %b, ptr %x
1380 declare <6 x half> @llvm.sqrt.v6f16(<6 x half>)
1382 define void @sqrt_v4f32(ptr %x) {
1383 ; ZVFH-LABEL: sqrt_v4f32:
1385 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1386 ; ZVFH-NEXT: vle32.v v8, (a0)
1387 ; ZVFH-NEXT: vfsqrt.v v8, v8
1388 ; ZVFH-NEXT: vse32.v v8, (a0)
1391 ; ZVFHMIN-LABEL: sqrt_v4f32:
1393 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
1394 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
1395 ; ZVFHMIN-NEXT: vfsqrt.v v8, v8
1396 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
1398 %a = load <4 x float>, ptr %x
1399 %b = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
1400 store <4 x float> %b, ptr %x
1403 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
1405 define void @sqrt_v2f64(ptr %x) {
1406 ; CHECK-LABEL: sqrt_v2f64:
1408 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1409 ; CHECK-NEXT: vle64.v v8, (a0)
1410 ; CHECK-NEXT: vfsqrt.v v8, v8
1411 ; CHECK-NEXT: vse64.v v8, (a0)
1413 %a = load <2 x double>, ptr %x
1414 %b = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
1415 store <2 x double> %b, ptr %x
1418 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
1420 define void @fma_v8f16(ptr %x, ptr %y, ptr %z) {
1421 ; ZVFH-LABEL: fma_v8f16:
1423 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1424 ; ZVFH-NEXT: vle16.v v8, (a0)
1425 ; ZVFH-NEXT: vle16.v v9, (a1)
1426 ; ZVFH-NEXT: vle16.v v10, (a2)
1427 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
1428 ; ZVFH-NEXT: vse16.v v10, (a0)
1431 ; ZVFHMIN-LABEL: fma_v8f16:
1433 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1434 ; ZVFHMIN-NEXT: vle16.v v8, (a2)
1435 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1436 ; ZVFHMIN-NEXT: vle16.v v10, (a1)
1437 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
1438 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
1439 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
1440 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1441 ; ZVFHMIN-NEXT: vfmadd.vv v9, v8, v11
1442 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1443 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
1444 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
1446 %a = load <8 x half>, ptr %x
1447 %b = load <8 x half>, ptr %y
1448 %c = load <8 x half>, ptr %z
1449 %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
1450 store <8 x half> %d, ptr %x
1453 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
1455 define void @fma_v6f16(ptr %x, ptr %y, ptr %z) {
1456 ; ZVFH-LABEL: fma_v6f16:
1458 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1459 ; ZVFH-NEXT: vle16.v v8, (a0)
1460 ; ZVFH-NEXT: vle16.v v9, (a1)
1461 ; ZVFH-NEXT: vle16.v v10, (a2)
1462 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1463 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
1464 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1465 ; ZVFH-NEXT: vse16.v v10, (a0)
1468 ; ZVFHMIN-RV32-LABEL: fma_v6f16:
1469 ; ZVFHMIN-RV32: # %bb.0:
1470 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1471 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a2)
1472 ; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
1473 ; ZVFHMIN-RV32-NEXT: vle16.v v10, (a1)
1474 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v8
1475 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
1476 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
1477 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1478 ; ZVFHMIN-RV32-NEXT: vfmadd.vv v9, v8, v11
1479 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1480 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
1481 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1482 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
1483 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
1484 ; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
1485 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1486 ; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
1487 ; ZVFHMIN-RV32-NEXT: ret
1489 ; ZVFHMIN-RV64-LABEL: fma_v6f16:
1490 ; ZVFHMIN-RV64: # %bb.0:
1491 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1492 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a2)
1493 ; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
1494 ; ZVFHMIN-RV64-NEXT: vle16.v v10, (a1)
1495 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v8
1496 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
1497 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
1498 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1499 ; ZVFHMIN-RV64-NEXT: vfmadd.vv v9, v8, v11
1500 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1501 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
1502 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1503 ; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
1504 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
1505 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
1506 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
1507 ; ZVFHMIN-RV64-NEXT: ret
1508 %a = load <6 x half>, ptr %x
1509 %b = load <6 x half>, ptr %y
1510 %c = load <6 x half>, ptr %z
1511 %d = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %c)
1512 store <6 x half> %d, ptr %x
1515 declare <6 x half> @llvm.fma.v6f16(<6 x half>, <6 x half>, <6 x half>)
1517 define void @fma_v4f32(ptr %x, ptr %y, ptr %z) {
1518 ; ZVFH-LABEL: fma_v4f32:
1520 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1521 ; ZVFH-NEXT: vle32.v v8, (a0)
1522 ; ZVFH-NEXT: vle32.v v9, (a1)
1523 ; ZVFH-NEXT: vle32.v v10, (a2)
1524 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
1525 ; ZVFH-NEXT: vse32.v v10, (a0)
1528 ; ZVFHMIN-LABEL: fma_v4f32:
1530 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
1531 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
1532 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
1533 ; ZVFHMIN-NEXT: vle32.v v10, (a2)
1534 ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9
1535 ; ZVFHMIN-NEXT: vse32.v v10, (a0)
1537 %a = load <4 x float>, ptr %x
1538 %b = load <4 x float>, ptr %y
1539 %c = load <4 x float>, ptr %z
1540 %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
1541 store <4 x float> %d, ptr %x
1544 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
1546 define void @fma_v2f64(ptr %x, ptr %y, ptr %z) {
1547 ; CHECK-LABEL: fma_v2f64:
1549 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1550 ; CHECK-NEXT: vle64.v v8, (a0)
1551 ; CHECK-NEXT: vle64.v v9, (a1)
1552 ; CHECK-NEXT: vle64.v v10, (a2)
1553 ; CHECK-NEXT: vfmacc.vv v10, v8, v9
1554 ; CHECK-NEXT: vse64.v v10, (a0)
1556 %a = load <2 x double>, ptr %x
1557 %b = load <2 x double>, ptr %y
1558 %c = load <2 x double>, ptr %z
1559 %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
1560 store <2 x double> %d, ptr %x
1563 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
1565 define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
1566 ; ZVFH-LABEL: fmsub_v8f16:
1568 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1569 ; ZVFH-NEXT: vle16.v v8, (a0)
1570 ; ZVFH-NEXT: vle16.v v9, (a1)
1571 ; ZVFH-NEXT: vle16.v v10, (a2)
1572 ; ZVFH-NEXT: vfmsac.vv v10, v8, v9
1573 ; ZVFH-NEXT: vse16.v v10, (a0)
1576 ; ZVFHMIN-LABEL: fmsub_v8f16:
1578 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1579 ; ZVFHMIN-NEXT: vle16.v v8, (a2)
1580 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1581 ; ZVFHMIN-NEXT: vle16.v v10, (a1)
1582 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
1583 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1584 ; ZVFHMIN-NEXT: vfneg.v v8, v11
1585 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1586 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
1587 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
1588 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
1589 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v11
1590 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1591 ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9
1592 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1593 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
1594 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
1596 %a = load <8 x half>, ptr %x
1597 %b = load <8 x half>, ptr %y
1598 %c = load <8 x half>, ptr %z
1599 %neg = fneg <8 x half> %c
1600 %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg)
1601 store <8 x half> %d, ptr %x
1605 define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
1606 ; ZVFH-LABEL: fmsub_v6f16:
1608 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1609 ; ZVFH-NEXT: vle16.v v8, (a0)
1610 ; ZVFH-NEXT: vle16.v v9, (a1)
1611 ; ZVFH-NEXT: vle16.v v10, (a2)
1612 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1613 ; ZVFH-NEXT: vfmsac.vv v10, v8, v9
1614 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1615 ; ZVFH-NEXT: vse16.v v10, (a0)
1618 ; ZVFHMIN-RV32-LABEL: fmsub_v6f16:
1619 ; ZVFHMIN-RV32: # %bb.0:
1620 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1621 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a2)
1622 ; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
1623 ; ZVFHMIN-RV32-NEXT: vle16.v v10, (a1)
1624 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v8
1625 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1626 ; ZVFHMIN-RV32-NEXT: vfneg.v v8, v11
1627 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1628 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v8
1629 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
1630 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
1631 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v11
1632 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1633 ; ZVFHMIN-RV32-NEXT: vfmacc.vv v10, v8, v9
1634 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1635 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v10
1636 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1637 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
1638 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
1639 ; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
1640 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
1641 ; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
1642 ; ZVFHMIN-RV32-NEXT: ret
1644 ; ZVFHMIN-RV64-LABEL: fmsub_v6f16:
1645 ; ZVFHMIN-RV64: # %bb.0:
1646 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
1647 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a2)
1648 ; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
1649 ; ZVFHMIN-RV64-NEXT: vle16.v v10, (a1)
1650 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v8
1651 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1652 ; ZVFHMIN-RV64-NEXT: vfneg.v v8, v11
1653 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1654 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v8
1655 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
1656 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
1657 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v11
1658 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
1659 ; ZVFHMIN-RV64-NEXT: vfmacc.vv v10, v8, v9
1660 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1661 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v10
1662 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
1663 ; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
1664 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
1665 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
1666 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
1667 ; ZVFHMIN-RV64-NEXT: ret
1668 %a = load <6 x half>, ptr %x
1669 %b = load <6 x half>, ptr %y
1670 %c = load <6 x half>, ptr %z
1671 %neg = fneg <6 x half> %c
1672 %d = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %neg)
1673 store <6 x half> %d, ptr %x
1677 define void @fnmsub_v4f32(ptr %x, ptr %y, ptr %z) {
1678 ; ZVFH-LABEL: fnmsub_v4f32:
1680 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1681 ; ZVFH-NEXT: vle32.v v8, (a0)
1682 ; ZVFH-NEXT: vle32.v v9, (a1)
1683 ; ZVFH-NEXT: vle32.v v10, (a2)
1684 ; ZVFH-NEXT: vfnmsac.vv v10, v8, v9
1685 ; ZVFH-NEXT: vse32.v v10, (a0)
1688 ; ZVFHMIN-LABEL: fnmsub_v4f32:
1690 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
1691 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
1692 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
1693 ; ZVFHMIN-NEXT: vle32.v v10, (a2)
1694 ; ZVFHMIN-NEXT: vfnmsac.vv v10, v8, v9
1695 ; ZVFHMIN-NEXT: vse32.v v10, (a0)
1697 %a = load <4 x float>, ptr %x
1698 %b = load <4 x float>, ptr %y
1699 %c = load <4 x float>, ptr %z
1700 %neg = fneg <4 x float> %a
1701 %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c)
1702 store <4 x float> %d, ptr %x
1706 define void @fnmadd_v2f64(ptr %x, ptr %y, ptr %z) {
1707 ; CHECK-LABEL: fnmadd_v2f64:
1709 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1710 ; CHECK-NEXT: vle64.v v8, (a0)
1711 ; CHECK-NEXT: vle64.v v9, (a1)
1712 ; CHECK-NEXT: vle64.v v10, (a2)
1713 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9
1714 ; CHECK-NEXT: vse64.v v10, (a0)
1716 %a = load <2 x double>, ptr %x
1717 %b = load <2 x double>, ptr %y
1718 %c = load <2 x double>, ptr %z
1719 %neg = fneg <2 x double> %b
1720 %neg2 = fneg <2 x double> %c
1721 %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2)
1722 store <2 x double> %d, ptr %x
1726 define void @fadd_v16f16(ptr %x, ptr %y) {
1727 ; ZVFH-LABEL: fadd_v16f16:
1729 ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1730 ; ZVFH-NEXT: vle16.v v8, (a0)
1731 ; ZVFH-NEXT: vle16.v v10, (a1)
1732 ; ZVFH-NEXT: vfadd.vv v8, v8, v10
1733 ; ZVFH-NEXT: vse16.v v8, (a0)
1736 ; ZVFHMIN-LABEL: fadd_v16f16:
1738 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m1, ta, ma
1739 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
1740 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1741 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
1742 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
1743 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1744 ; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10
1745 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1746 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
1747 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
1749 %a = load <16 x half>, ptr %x
1750 %b = load <16 x half>, ptr %y
1751 %c = fadd <16 x half> %a, %b
1752 store <16 x half> %c, ptr %x
1756 define void @fadd_v8f32(ptr %x, ptr %y) {
1757 ; ZVFH-LABEL: fadd_v8f32:
1759 ; ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1760 ; ZVFH-NEXT: vle32.v v8, (a0)
1761 ; ZVFH-NEXT: vle32.v v10, (a1)
1762 ; ZVFH-NEXT: vfadd.vv v8, v8, v10
1763 ; ZVFH-NEXT: vse32.v v8, (a0)
1766 ; ZVFHMIN-LABEL: fadd_v8f32:
1768 ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma
1769 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
1770 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
1771 ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9
1772 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
1774 %a = load <8 x float>, ptr %x
1775 %b = load <8 x float>, ptr %y
1776 %c = fadd <8 x float> %a, %b
1777 store <8 x float> %c, ptr %x
1781 define void @fadd_v4f64(ptr %x, ptr %y) {
1782 ; ZVFH-LABEL: fadd_v4f64:
1784 ; ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1785 ; ZVFH-NEXT: vle64.v v8, (a0)
1786 ; ZVFH-NEXT: vle64.v v10, (a1)
1787 ; ZVFH-NEXT: vfadd.vv v8, v8, v10
1788 ; ZVFH-NEXT: vse64.v v8, (a0)
1791 ; ZVFHMIN-LABEL: fadd_v4f64:
1793 ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma
1794 ; ZVFHMIN-NEXT: vle64.v v8, (a0)
1795 ; ZVFHMIN-NEXT: vle64.v v9, (a1)
1796 ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9
1797 ; ZVFHMIN-NEXT: vse64.v v8, (a0)
1799 %a = load <4 x double>, ptr %x
1800 %b = load <4 x double>, ptr %y
1801 %c = fadd <4 x double> %a, %b
1802 store <4 x double> %c, ptr %x
1806 define void @fsub_v16f16(ptr %x, ptr %y) {
1807 ; ZVFH-LABEL: fsub_v16f16:
1809 ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1810 ; ZVFH-NEXT: vle16.v v8, (a0)
1811 ; ZVFH-NEXT: vle16.v v10, (a1)
1812 ; ZVFH-NEXT: vfsub.vv v8, v8, v10
1813 ; ZVFH-NEXT: vse16.v v8, (a0)
1816 ; ZVFHMIN-LABEL: fsub_v16f16:
1818 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m1, ta, ma
1819 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
1820 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1821 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
1822 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
1823 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1824 ; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10
1825 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1826 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
1827 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
1829 %a = load <16 x half>, ptr %x
1830 %b = load <16 x half>, ptr %y
1831 %c = fsub <16 x half> %a, %b
1832 store <16 x half> %c, ptr %x
1836 define void @fsub_v8f32(ptr %x, ptr %y) {
1837 ; ZVFH-LABEL: fsub_v8f32:
1839 ; ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1840 ; ZVFH-NEXT: vle32.v v8, (a0)
1841 ; ZVFH-NEXT: vle32.v v10, (a1)
1842 ; ZVFH-NEXT: vfsub.vv v8, v8, v10
1843 ; ZVFH-NEXT: vse32.v v8, (a0)
1846 ; ZVFHMIN-LABEL: fsub_v8f32:
1848 ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma
1849 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
1850 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
1851 ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9
1852 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
1854 %a = load <8 x float>, ptr %x
1855 %b = load <8 x float>, ptr %y
1856 %c = fsub <8 x float> %a, %b
1857 store <8 x float> %c, ptr %x
1861 define void @fsub_v4f64(ptr %x, ptr %y) {
1862 ; ZVFH-LABEL: fsub_v4f64:
1864 ; ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1865 ; ZVFH-NEXT: vle64.v v8, (a0)
1866 ; ZVFH-NEXT: vle64.v v10, (a1)
1867 ; ZVFH-NEXT: vfsub.vv v8, v8, v10
1868 ; ZVFH-NEXT: vse64.v v8, (a0)
1871 ; ZVFHMIN-LABEL: fsub_v4f64:
1873 ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma
1874 ; ZVFHMIN-NEXT: vle64.v v8, (a0)
1875 ; ZVFHMIN-NEXT: vle64.v v9, (a1)
1876 ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9
1877 ; ZVFHMIN-NEXT: vse64.v v8, (a0)
1879 %a = load <4 x double>, ptr %x
1880 %b = load <4 x double>, ptr %y
1881 %c = fsub <4 x double> %a, %b
1882 store <4 x double> %c, ptr %x
1886 define void @fmul_v16f16(ptr %x, ptr %y) {
1887 ; ZVFH-LABEL: fmul_v16f16:
1889 ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1890 ; ZVFH-NEXT: vle16.v v8, (a0)
1891 ; ZVFH-NEXT: vle16.v v10, (a1)
1892 ; ZVFH-NEXT: vfmul.vv v8, v8, v10
1893 ; ZVFH-NEXT: vse16.v v8, (a0)
1896 ; ZVFHMIN-LABEL: fmul_v16f16:
1898 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m1, ta, ma
1899 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
1900 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1901 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
1902 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
1903 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1904 ; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10
1905 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1906 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
1907 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
1909 %a = load <16 x half>, ptr %x
1910 %b = load <16 x half>, ptr %y
1911 %c = fmul <16 x half> %a, %b
1912 store <16 x half> %c, ptr %x
1916 define void @fmul_v8f32(ptr %x, ptr %y) {
1917 ; ZVFH-LABEL: fmul_v8f32:
1919 ; ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1920 ; ZVFH-NEXT: vle32.v v8, (a0)
1921 ; ZVFH-NEXT: vle32.v v10, (a1)
1922 ; ZVFH-NEXT: vfmul.vv v8, v8, v10
1923 ; ZVFH-NEXT: vse32.v v8, (a0)
1926 ; ZVFHMIN-LABEL: fmul_v8f32:
1928 ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma
1929 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
1930 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
1931 ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9
1932 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
1934 %a = load <8 x float>, ptr %x
1935 %b = load <8 x float>, ptr %y
1936 %c = fmul <8 x float> %a, %b
1937 store <8 x float> %c, ptr %x
1941 define void @fmul_v4f64(ptr %x, ptr %y) {
1942 ; ZVFH-LABEL: fmul_v4f64:
1944 ; ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1945 ; ZVFH-NEXT: vle64.v v8, (a0)
1946 ; ZVFH-NEXT: vle64.v v10, (a1)
1947 ; ZVFH-NEXT: vfmul.vv v8, v8, v10
1948 ; ZVFH-NEXT: vse64.v v8, (a0)
1951 ; ZVFHMIN-LABEL: fmul_v4f64:
1953 ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma
1954 ; ZVFHMIN-NEXT: vle64.v v8, (a0)
1955 ; ZVFHMIN-NEXT: vle64.v v9, (a1)
1956 ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9
1957 ; ZVFHMIN-NEXT: vse64.v v8, (a0)
1959 %a = load <4 x double>, ptr %x
1960 %b = load <4 x double>, ptr %y
1961 %c = fmul <4 x double> %a, %b
1962 store <4 x double> %c, ptr %x
1966 define void @fdiv_v16f16(ptr %x, ptr %y) {
1967 ; ZVFH-LABEL: fdiv_v16f16:
1969 ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1970 ; ZVFH-NEXT: vle16.v v8, (a0)
1971 ; ZVFH-NEXT: vle16.v v10, (a1)
1972 ; ZVFH-NEXT: vfdiv.vv v8, v8, v10
1973 ; ZVFH-NEXT: vse16.v v8, (a0)
1976 ; ZVFHMIN-LABEL: fdiv_v16f16:
1978 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m1, ta, ma
1979 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
1980 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1981 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
1982 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
1983 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1984 ; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10
1985 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1986 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
1987 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
1989 %a = load <16 x half>, ptr %x
1990 %b = load <16 x half>, ptr %y
1991 %c = fdiv <16 x half> %a, %b
1992 store <16 x half> %c, ptr %x
1996 define void @fdiv_v8f32(ptr %x, ptr %y) {
1997 ; ZVFH-LABEL: fdiv_v8f32:
1999 ; ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2000 ; ZVFH-NEXT: vle32.v v8, (a0)
2001 ; ZVFH-NEXT: vle32.v v10, (a1)
2002 ; ZVFH-NEXT: vfdiv.vv v8, v8, v10
2003 ; ZVFH-NEXT: vse32.v v8, (a0)
2006 ; ZVFHMIN-LABEL: fdiv_v8f32:
2008 ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma
2009 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
2010 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
2011 ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9
2012 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
2014 %a = load <8 x float>, ptr %x
2015 %b = load <8 x float>, ptr %y
2016 %c = fdiv <8 x float> %a, %b
2017 store <8 x float> %c, ptr %x
2021 define void @fdiv_v4f64(ptr %x, ptr %y) {
2022 ; ZVFH-LABEL: fdiv_v4f64:
2024 ; ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2025 ; ZVFH-NEXT: vle64.v v8, (a0)
2026 ; ZVFH-NEXT: vle64.v v10, (a1)
2027 ; ZVFH-NEXT: vfdiv.vv v8, v8, v10
2028 ; ZVFH-NEXT: vse64.v v8, (a0)
2031 ; ZVFHMIN-LABEL: fdiv_v4f64:
2033 ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma
2034 ; ZVFHMIN-NEXT: vle64.v v8, (a0)
2035 ; ZVFHMIN-NEXT: vle64.v v9, (a1)
2036 ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9
2037 ; ZVFHMIN-NEXT: vse64.v v8, (a0)
2039 %a = load <4 x double>, ptr %x
2040 %b = load <4 x double>, ptr %y
2041 %c = fdiv <4 x double> %a, %b
2042 store <4 x double> %c, ptr %x
2046 define void @fneg_v16f16(ptr %x) {
2047 ; ZVFH-LABEL: fneg_v16f16:
2049 ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2050 ; ZVFH-NEXT: vle16.v v8, (a0)
2051 ; ZVFH-NEXT: vfneg.v v8, v8
2052 ; ZVFH-NEXT: vse16.v v8, (a0)
2055 ; ZVFHMIN-LABEL: fneg_v16f16:
2057 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m1, ta, ma
2058 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2059 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
2060 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2061 ; ZVFHMIN-NEXT: vfneg.v v8, v10
2062 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2063 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
2064 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
2066 %a = load <16 x half>, ptr %x
2067 %b = fneg <16 x half> %a
2068 store <16 x half> %b, ptr %x
2072 define void @fneg_v8f32(ptr %x) {
2073 ; ZVFH-LABEL: fneg_v8f32:
2075 ; ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2076 ; ZVFH-NEXT: vle32.v v8, (a0)
2077 ; ZVFH-NEXT: vfneg.v v8, v8
2078 ; ZVFH-NEXT: vse32.v v8, (a0)
2081 ; ZVFHMIN-LABEL: fneg_v8f32:
2083 ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma
2084 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
2085 ; ZVFHMIN-NEXT: vfneg.v v8, v8
2086 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
2088 %a = load <8 x float>, ptr %x
2089 %b = fneg <8 x float> %a
2090 store <8 x float> %b, ptr %x
2094 define void @fneg_v4f64(ptr %x) {
2095 ; ZVFH-LABEL: fneg_v4f64:
2097 ; ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2098 ; ZVFH-NEXT: vle64.v v8, (a0)
2099 ; ZVFH-NEXT: vfneg.v v8, v8
2100 ; ZVFH-NEXT: vse64.v v8, (a0)
2103 ; ZVFHMIN-LABEL: fneg_v4f64:
2105 ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma
2106 ; ZVFHMIN-NEXT: vle64.v v8, (a0)
2107 ; ZVFHMIN-NEXT: vfneg.v v8, v8
2108 ; ZVFHMIN-NEXT: vse64.v v8, (a0)
2110 %a = load <4 x double>, ptr %x
2111 %b = fneg <4 x double> %a
2112 store <4 x double> %b, ptr %x
2116 define void @fma_v16f16(ptr %x, ptr %y, ptr %z) {
2117 ; ZVFH-LABEL: fma_v16f16:
2119 ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2120 ; ZVFH-NEXT: vle16.v v8, (a0)
2121 ; ZVFH-NEXT: vle16.v v10, (a1)
2122 ; ZVFH-NEXT: vle16.v v12, (a2)
2123 ; ZVFH-NEXT: vfmacc.vv v12, v8, v10
2124 ; ZVFH-NEXT: vse16.v v12, (a0)
2127 ; ZVFHMIN-LABEL: fma_v16f16:
2129 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m1, ta, ma
2130 ; ZVFHMIN-NEXT: vle16.v v8, (a2)
2131 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
2132 ; ZVFHMIN-NEXT: vle16.v v10, (a1)
2133 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
2134 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
2135 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
2136 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2137 ; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12
2138 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2139 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
2140 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
2142 %a = load <16 x half>, ptr %x
2143 %b = load <16 x half>, ptr %y
2144 %c = load <16 x half>, ptr %z
2145 %d = call <16 x half> @llvm.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c)
2146 store <16 x half> %d, ptr %x
2149 declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>)
2151 define void @fma_v8f32(ptr %x, ptr %y, ptr %z) {
2152 ; ZVFH-LABEL: fma_v8f32:
2154 ; ZVFH-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2155 ; ZVFH-NEXT: vle32.v v8, (a0)
2156 ; ZVFH-NEXT: vle32.v v10, (a1)
2157 ; ZVFH-NEXT: vle32.v v12, (a2)
2158 ; ZVFH-NEXT: vfmacc.vv v12, v8, v10
2159 ; ZVFH-NEXT: vse32.v v12, (a0)
2162 ; ZVFHMIN-LABEL: fma_v8f32:
2164 ; ZVFHMIN-NEXT: vsetivli zero, 8, e32, m1, ta, ma
2165 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
2166 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
2167 ; ZVFHMIN-NEXT: vle32.v v10, (a2)
2168 ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9
2169 ; ZVFHMIN-NEXT: vse32.v v10, (a0)
2171 %a = load <8 x float>, ptr %x
2172 %b = load <8 x float>, ptr %y
2173 %c = load <8 x float>, ptr %z
2174 %d = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
2175 store <8 x float> %d, ptr %x
2178 declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
2180 define void @fma_v4f64(ptr %x, ptr %y, ptr %z) {
2181 ; ZVFH-LABEL: fma_v4f64:
2183 ; ZVFH-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2184 ; ZVFH-NEXT: vle64.v v8, (a0)
2185 ; ZVFH-NEXT: vle64.v v10, (a1)
2186 ; ZVFH-NEXT: vle64.v v12, (a2)
2187 ; ZVFH-NEXT: vfmacc.vv v12, v8, v10
2188 ; ZVFH-NEXT: vse64.v v12, (a0)
2191 ; ZVFHMIN-LABEL: fma_v4f64:
2193 ; ZVFHMIN-NEXT: vsetivli zero, 4, e64, m1, ta, ma
2194 ; ZVFHMIN-NEXT: vle64.v v8, (a0)
2195 ; ZVFHMIN-NEXT: vle64.v v9, (a1)
2196 ; ZVFHMIN-NEXT: vle64.v v10, (a2)
2197 ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9
2198 ; ZVFHMIN-NEXT: vse64.v v10, (a0)
2200 %a = load <4 x double>, ptr %x
2201 %b = load <4 x double>, ptr %y
2202 %c = load <4 x double>, ptr %z
2203 %d = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
2204 store <4 x double> %d, ptr %x
2207 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
2209 define void @fadd_vf_v8f16(ptr %x, half %y) {
2210 ; ZVFH-LABEL: fadd_vf_v8f16:
2212 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2213 ; ZVFH-NEXT: vle16.v v8, (a0)
2214 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
2215 ; ZVFH-NEXT: vse16.v v8, (a0)
2218 ; ZVFHMIN-LABEL: fadd_vf_v8f16:
2220 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2221 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2222 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
2223 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2224 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
2225 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2226 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
2227 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2228 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
2229 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
2230 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2231 ; ZVFHMIN-NEXT: vfadd.vv v8, v9, v8
2232 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2233 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
2234 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
2236 %a = load <8 x half>, ptr %x
2237 %b = insertelement <8 x half> poison, half %y, i32 0
2238 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2239 %d = fadd <8 x half> %a, %c
2240 store <8 x half> %d, ptr %x
2244 define void @fadd_vf_v6f16(ptr %x, half %y) {
2245 ; ZVFH-LABEL: fadd_vf_v6f16:
2247 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2248 ; ZVFH-NEXT: vle16.v v8, (a0)
2249 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2250 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
2251 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2252 ; ZVFH-NEXT: vse16.v v8, (a0)
2255 ; ZVFHMIN-RV32-LABEL: fadd_vf_v6f16:
2256 ; ZVFHMIN-RV32: # %bb.0:
2257 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2258 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
2259 ; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
2260 ; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2261 ; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
2262 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2263 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
2264 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2265 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
2266 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
2267 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2268 ; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v9, v8
2269 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2270 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
2271 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2272 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
2273 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
2274 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
2275 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
2276 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
2277 ; ZVFHMIN-RV32-NEXT: ret
2279 ; ZVFHMIN-RV64-LABEL: fadd_vf_v6f16:
2280 ; ZVFHMIN-RV64: # %bb.0:
2281 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2282 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
2283 ; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
2284 ; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2285 ; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
2286 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2287 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
2288 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2289 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
2290 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
2291 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2292 ; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v9, v8
2293 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2294 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
2295 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2296 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
2297 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
2298 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
2299 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
2300 ; ZVFHMIN-RV64-NEXT: ret
2301 %a = load <6 x half>, ptr %x
2302 %b = insertelement <6 x half> poison, half %y, i32 0
2303 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2304 %d = fadd <6 x half> %a, %c
2305 store <6 x half> %d, ptr %x
2309 define void @fadd_vf_v4f32(ptr %x, float %y) {
2310 ; ZVFH-LABEL: fadd_vf_v4f32:
2312 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2313 ; ZVFH-NEXT: vle32.v v8, (a0)
2314 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
2315 ; ZVFH-NEXT: vse32.v v8, (a0)
2318 ; ZVFHMIN-LABEL: fadd_vf_v4f32:
2320 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
2321 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
2322 ; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0
2323 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
2325 %a = load <4 x float>, ptr %x
2326 %b = insertelement <4 x float> poison, float %y, i32 0
2327 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2328 %d = fadd <4 x float> %a, %c
2329 store <4 x float> %d, ptr %x
2333 define void @fadd_vf_v2f64(ptr %x, double %y) {
2334 ; CHECK-LABEL: fadd_vf_v2f64:
2336 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2337 ; CHECK-NEXT: vle64.v v8, (a0)
2338 ; CHECK-NEXT: vfadd.vf v8, v8, fa0
2339 ; CHECK-NEXT: vse64.v v8, (a0)
2341 %a = load <2 x double>, ptr %x
2342 %b = insertelement <2 x double> poison, double %y, i32 0
2343 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2344 %d = fadd <2 x double> %a, %c
2345 store <2 x double> %d, ptr %x
2349 define void @fadd_fv_v8f16(ptr %x, half %y) {
2350 ; ZVFH-LABEL: fadd_fv_v8f16:
2352 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2353 ; ZVFH-NEXT: vle16.v v8, (a0)
2354 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
2355 ; ZVFH-NEXT: vse16.v v8, (a0)
2358 ; ZVFHMIN-LABEL: fadd_fv_v8f16:
2360 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2361 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2362 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
2363 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2364 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
2365 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2366 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
2367 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2368 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
2369 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
2370 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2371 ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9
2372 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2373 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
2374 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
2376 %a = load <8 x half>, ptr %x
2377 %b = insertelement <8 x half> poison, half %y, i32 0
2378 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2379 %d = fadd <8 x half> %c, %a
2380 store <8 x half> %d, ptr %x
2384 define void @fadd_fv_v6f16(ptr %x, half %y) {
2385 ; ZVFH-LABEL: fadd_fv_v6f16:
2387 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2388 ; ZVFH-NEXT: vle16.v v8, (a0)
2389 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2390 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
2391 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2392 ; ZVFH-NEXT: vse16.v v8, (a0)
2395 ; ZVFHMIN-RV32-LABEL: fadd_fv_v6f16:
2396 ; ZVFHMIN-RV32: # %bb.0:
2397 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2398 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
2399 ; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
2400 ; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2401 ; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
2402 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2403 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
2404 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2405 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
2406 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
2407 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2408 ; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v8, v9
2409 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2410 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
2411 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2412 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
2413 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
2414 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
2415 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
2416 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
2417 ; ZVFHMIN-RV32-NEXT: ret
2419 ; ZVFHMIN-RV64-LABEL: fadd_fv_v6f16:
2420 ; ZVFHMIN-RV64: # %bb.0:
2421 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2422 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
2423 ; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
2424 ; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2425 ; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
2426 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2427 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
2428 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2429 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
2430 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
2431 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2432 ; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v8, v9
2433 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2434 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
2435 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2436 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
2437 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
2438 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
2439 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
2440 ; ZVFHMIN-RV64-NEXT: ret
2441 %a = load <6 x half>, ptr %x
2442 %b = insertelement <6 x half> poison, half %y, i32 0
2443 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2444 %d = fadd <6 x half> %c, %a
2445 store <6 x half> %d, ptr %x
2449 define void @fadd_fv_v4f32(ptr %x, float %y) {
2450 ; ZVFH-LABEL: fadd_fv_v4f32:
2452 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2453 ; ZVFH-NEXT: vle32.v v8, (a0)
2454 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
2455 ; ZVFH-NEXT: vse32.v v8, (a0)
2458 ; ZVFHMIN-LABEL: fadd_fv_v4f32:
2460 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
2461 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
2462 ; ZVFHMIN-NEXT: vfadd.vf v8, v8, fa0
2463 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
2465 %a = load <4 x float>, ptr %x
2466 %b = insertelement <4 x float> poison, float %y, i32 0
2467 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2468 %d = fadd <4 x float> %c, %a
2469 store <4 x float> %d, ptr %x
2473 define void @fadd_fv_v2f64(ptr %x, double %y) {
2474 ; CHECK-LABEL: fadd_fv_v2f64:
2476 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2477 ; CHECK-NEXT: vle64.v v8, (a0)
2478 ; CHECK-NEXT: vfadd.vf v8, v8, fa0
2479 ; CHECK-NEXT: vse64.v v8, (a0)
2481 %a = load <2 x double>, ptr %x
2482 %b = insertelement <2 x double> poison, double %y, i32 0
2483 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2484 %d = fadd <2 x double> %c, %a
2485 store <2 x double> %d, ptr %x
2489 define void @fsub_vf_v8f16(ptr %x, half %y) {
2490 ; ZVFH-LABEL: fsub_vf_v8f16:
2492 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2493 ; ZVFH-NEXT: vle16.v v8, (a0)
2494 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0
2495 ; ZVFH-NEXT: vse16.v v8, (a0)
2498 ; ZVFHMIN-LABEL: fsub_vf_v8f16:
2500 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2501 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2502 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
2503 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2504 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
2505 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2506 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
2507 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2508 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
2509 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
2510 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2511 ; ZVFHMIN-NEXT: vfsub.vv v8, v9, v8
2512 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2513 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
2514 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
2516 %a = load <8 x half>, ptr %x
2517 %b = insertelement <8 x half> poison, half %y, i32 0
2518 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2519 %d = fsub <8 x half> %a, %c
2520 store <8 x half> %d, ptr %x
2524 define void @fsub_vf_v6f16(ptr %x, half %y) {
2525 ; ZVFH-LABEL: fsub_vf_v6f16:
2527 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2528 ; ZVFH-NEXT: vle16.v v8, (a0)
2529 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2530 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0
2531 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2532 ; ZVFH-NEXT: vse16.v v8, (a0)
2535 ; ZVFHMIN-RV32-LABEL: fsub_vf_v6f16:
2536 ; ZVFHMIN-RV32: # %bb.0:
2537 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2538 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
2539 ; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
2540 ; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2541 ; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
2542 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2543 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
2544 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2545 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
2546 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
2547 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2548 ; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v9, v8
2549 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2550 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
2551 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2552 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
2553 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
2554 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
2555 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
2556 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
2557 ; ZVFHMIN-RV32-NEXT: ret
2559 ; ZVFHMIN-RV64-LABEL: fsub_vf_v6f16:
2560 ; ZVFHMIN-RV64: # %bb.0:
2561 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2562 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
2563 ; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
2564 ; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2565 ; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
2566 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2567 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
2568 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2569 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
2570 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
2571 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2572 ; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v9, v8
2573 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2574 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
2575 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2576 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
2577 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
2578 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
2579 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
2580 ; ZVFHMIN-RV64-NEXT: ret
2581 %a = load <6 x half>, ptr %x
2582 %b = insertelement <6 x half> poison, half %y, i32 0
2583 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2584 %d = fsub <6 x half> %a, %c
2585 store <6 x half> %d, ptr %x
2589 define void @fsub_vf_v4f32(ptr %x, float %y) {
2590 ; ZVFH-LABEL: fsub_vf_v4f32:
2592 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2593 ; ZVFH-NEXT: vle32.v v8, (a0)
2594 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0
2595 ; ZVFH-NEXT: vse32.v v8, (a0)
2598 ; ZVFHMIN-LABEL: fsub_vf_v4f32:
2600 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
2601 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
2602 ; ZVFHMIN-NEXT: vfsub.vf v8, v8, fa0
2603 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
2605 %a = load <4 x float>, ptr %x
2606 %b = insertelement <4 x float> poison, float %y, i32 0
2607 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2608 %d = fsub <4 x float> %a, %c
2609 store <4 x float> %d, ptr %x
2613 define void @fsub_vf_v2f64(ptr %x, double %y) {
2614 ; CHECK-LABEL: fsub_vf_v2f64:
2616 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2617 ; CHECK-NEXT: vle64.v v8, (a0)
2618 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
2619 ; CHECK-NEXT: vse64.v v8, (a0)
2621 %a = load <2 x double>, ptr %x
2622 %b = insertelement <2 x double> poison, double %y, i32 0
2623 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2624 %d = fsub <2 x double> %a, %c
2625 store <2 x double> %d, ptr %x
2629 define void @fsub_fv_v8f16(ptr %x, half %y) {
2630 ; ZVFH-LABEL: fsub_fv_v8f16:
2632 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2633 ; ZVFH-NEXT: vle16.v v8, (a0)
2634 ; ZVFH-NEXT: vfrsub.vf v8, v8, fa0
2635 ; ZVFH-NEXT: vse16.v v8, (a0)
2638 ; ZVFHMIN-LABEL: fsub_fv_v8f16:
2640 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2641 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2642 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
2643 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2644 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
2645 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2646 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
2647 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2648 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
2649 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
2650 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2651 ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9
2652 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2653 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
2654 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
2656 %a = load <8 x half>, ptr %x
2657 %b = insertelement <8 x half> poison, half %y, i32 0
2658 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2659 %d = fsub <8 x half> %c, %a
2660 store <8 x half> %d, ptr %x
2664 define void @fsub_fv_v6f16(ptr %x, half %y) {
2665 ; ZVFH-LABEL: fsub_fv_v6f16:
2667 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2668 ; ZVFH-NEXT: vle16.v v8, (a0)
2669 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2670 ; ZVFH-NEXT: vfrsub.vf v8, v8, fa0
2671 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2672 ; ZVFH-NEXT: vse16.v v8, (a0)
2675 ; ZVFHMIN-RV32-LABEL: fsub_fv_v6f16:
2676 ; ZVFHMIN-RV32: # %bb.0:
2677 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2678 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
2679 ; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
2680 ; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2681 ; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
2682 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2683 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
2684 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2685 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
2686 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
2687 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2688 ; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v8, v9
2689 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2690 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
2691 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2692 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
2693 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
2694 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
2695 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
2696 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
2697 ; ZVFHMIN-RV32-NEXT: ret
2699 ; ZVFHMIN-RV64-LABEL: fsub_fv_v6f16:
2700 ; ZVFHMIN-RV64: # %bb.0:
2701 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2702 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
2703 ; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
2704 ; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2705 ; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
2706 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2707 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
2708 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2709 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
2710 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
2711 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2712 ; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v8, v9
2713 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2714 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
2715 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2716 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
2717 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
2718 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
2719 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
2720 ; ZVFHMIN-RV64-NEXT: ret
2721 %a = load <6 x half>, ptr %x
2722 %b = insertelement <6 x half> poison, half %y, i32 0
2723 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2724 %d = fsub <6 x half> %c, %a
2725 store <6 x half> %d, ptr %x
2729 define void @fsub_fv_v4f32(ptr %x, float %y) {
2730 ; ZVFH-LABEL: fsub_fv_v4f32:
2732 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2733 ; ZVFH-NEXT: vle32.v v8, (a0)
2734 ; ZVFH-NEXT: vfrsub.vf v8, v8, fa0
2735 ; ZVFH-NEXT: vse32.v v8, (a0)
2738 ; ZVFHMIN-LABEL: fsub_fv_v4f32:
2740 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
2741 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
2742 ; ZVFHMIN-NEXT: vfrsub.vf v8, v8, fa0
2743 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
2745 %a = load <4 x float>, ptr %x
2746 %b = insertelement <4 x float> poison, float %y, i32 0
2747 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2748 %d = fsub <4 x float> %c, %a
2749 store <4 x float> %d, ptr %x
2753 define void @fsub_fv_v2f64(ptr %x, double %y) {
2754 ; CHECK-LABEL: fsub_fv_v2f64:
2756 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2757 ; CHECK-NEXT: vle64.v v8, (a0)
2758 ; CHECK-NEXT: vfrsub.vf v8, v8, fa0
2759 ; CHECK-NEXT: vse64.v v8, (a0)
2761 %a = load <2 x double>, ptr %x
2762 %b = insertelement <2 x double> poison, double %y, i32 0
2763 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2764 %d = fsub <2 x double> %c, %a
2765 store <2 x double> %d, ptr %x
2769 define void @fmul_vf_v8f16(ptr %x, half %y) {
2770 ; ZVFH-LABEL: fmul_vf_v8f16:
2772 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2773 ; ZVFH-NEXT: vle16.v v8, (a0)
2774 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
2775 ; ZVFH-NEXT: vse16.v v8, (a0)
2778 ; ZVFHMIN-LABEL: fmul_vf_v8f16:
2780 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2781 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2782 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
2783 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2784 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
2785 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2786 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
2787 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2788 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
2789 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
2790 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2791 ; ZVFHMIN-NEXT: vfmul.vv v8, v9, v8
2792 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2793 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
2794 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
2796 %a = load <8 x half>, ptr %x
2797 %b = insertelement <8 x half> poison, half %y, i32 0
2798 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2799 %d = fmul <8 x half> %a, %c
2800 store <8 x half> %d, ptr %x
2804 define void @fmul_vf_v6f16(ptr %x, half %y) {
2805 ; ZVFH-LABEL: fmul_vf_v6f16:
2807 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2808 ; ZVFH-NEXT: vle16.v v8, (a0)
2809 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2810 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
2811 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2812 ; ZVFH-NEXT: vse16.v v8, (a0)
2815 ; ZVFHMIN-RV32-LABEL: fmul_vf_v6f16:
2816 ; ZVFHMIN-RV32: # %bb.0:
2817 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2818 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
2819 ; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
2820 ; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2821 ; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
2822 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2823 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
2824 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2825 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
2826 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
2827 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2828 ; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v9, v8
2829 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2830 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
2831 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2832 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
2833 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
2834 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
2835 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
2836 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
2837 ; ZVFHMIN-RV32-NEXT: ret
2839 ; ZVFHMIN-RV64-LABEL: fmul_vf_v6f16:
2840 ; ZVFHMIN-RV64: # %bb.0:
2841 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2842 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
2843 ; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
2844 ; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2845 ; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
2846 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2847 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
2848 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2849 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
2850 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
2851 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2852 ; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v9, v8
2853 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2854 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
2855 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2856 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
2857 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
2858 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
2859 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
2860 ; ZVFHMIN-RV64-NEXT: ret
2861 %a = load <6 x half>, ptr %x
2862 %b = insertelement <6 x half> poison, half %y, i32 0
2863 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2864 %d = fmul <6 x half> %a, %c
2865 store <6 x half> %d, ptr %x
2869 define void @fmul_vf_v4f32(ptr %x, float %y) {
2870 ; ZVFH-LABEL: fmul_vf_v4f32:
2872 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2873 ; ZVFH-NEXT: vle32.v v8, (a0)
2874 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
2875 ; ZVFH-NEXT: vse32.v v8, (a0)
2878 ; ZVFHMIN-LABEL: fmul_vf_v4f32:
2880 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
2881 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
2882 ; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0
2883 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
2885 %a = load <4 x float>, ptr %x
2886 %b = insertelement <4 x float> poison, float %y, i32 0
2887 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2888 %d = fmul <4 x float> %a, %c
2889 store <4 x float> %d, ptr %x
2893 define void @fmul_vf_v2f64(ptr %x, double %y) {
2894 ; CHECK-LABEL: fmul_vf_v2f64:
2896 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2897 ; CHECK-NEXT: vle64.v v8, (a0)
2898 ; CHECK-NEXT: vfmul.vf v8, v8, fa0
2899 ; CHECK-NEXT: vse64.v v8, (a0)
2901 %a = load <2 x double>, ptr %x
2902 %b = insertelement <2 x double> poison, double %y, i32 0
2903 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2904 %d = fmul <2 x double> %a, %c
2905 store <2 x double> %d, ptr %x
2909 define void @fmul_fv_v8f16(ptr %x, half %y) {
2910 ; ZVFH-LABEL: fmul_fv_v8f16:
2912 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2913 ; ZVFH-NEXT: vle16.v v8, (a0)
2914 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
2915 ; ZVFH-NEXT: vse16.v v8, (a0)
2918 ; ZVFHMIN-LABEL: fmul_fv_v8f16:
2920 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2921 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2922 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
2923 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2924 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
2925 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2926 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
2927 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2928 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
2929 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
2930 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2931 ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v9
2932 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2933 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
2934 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
2936 %a = load <8 x half>, ptr %x
2937 %b = insertelement <8 x half> poison, half %y, i32 0
2938 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2939 %d = fmul <8 x half> %c, %a
2940 store <8 x half> %d, ptr %x
2944 define void @fmul_fv_v6f16(ptr %x, half %y) {
2945 ; ZVFH-LABEL: fmul_fv_v6f16:
2947 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2948 ; ZVFH-NEXT: vle16.v v8, (a0)
2949 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2950 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
2951 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2952 ; ZVFH-NEXT: vse16.v v8, (a0)
2955 ; ZVFHMIN-RV32-LABEL: fmul_fv_v6f16:
2956 ; ZVFHMIN-RV32: # %bb.0:
2957 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2958 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
2959 ; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
2960 ; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2961 ; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
2962 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2963 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
2964 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2965 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
2966 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
2967 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2968 ; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v8, v9
2969 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2970 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
2971 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2972 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
2973 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
2974 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
2975 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
2976 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
2977 ; ZVFHMIN-RV32-NEXT: ret
2979 ; ZVFHMIN-RV64-LABEL: fmul_fv_v6f16:
2980 ; ZVFHMIN-RV64: # %bb.0:
2981 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2982 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
2983 ; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
2984 ; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
2985 ; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
2986 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2987 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
2988 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
2989 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
2990 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
2991 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
2992 ; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v8, v9
2993 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
2994 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
2995 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
2996 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
2997 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
2998 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
2999 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
3000 ; ZVFHMIN-RV64-NEXT: ret
3001 %a = load <6 x half>, ptr %x
3002 %b = insertelement <6 x half> poison, half %y, i32 0
3003 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
3004 %d = fmul <6 x half> %c, %a
3005 store <6 x half> %d, ptr %x
3009 define void @fmul_fv_v4f32(ptr %x, float %y) {
3010 ; ZVFH-LABEL: fmul_fv_v4f32:
3012 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3013 ; ZVFH-NEXT: vle32.v v8, (a0)
3014 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
3015 ; ZVFH-NEXT: vse32.v v8, (a0)
3018 ; ZVFHMIN-LABEL: fmul_fv_v4f32:
3020 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
3021 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
3022 ; ZVFHMIN-NEXT: vfmul.vf v8, v8, fa0
3023 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
3025 %a = load <4 x float>, ptr %x
3026 %b = insertelement <4 x float> poison, float %y, i32 0
3027 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
3028 %d = fmul <4 x float> %c, %a
3029 store <4 x float> %d, ptr %x
3033 define void @fmul_fv_v2f64(ptr %x, double %y) {
3034 ; CHECK-LABEL: fmul_fv_v2f64:
3036 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3037 ; CHECK-NEXT: vle64.v v8, (a0)
3038 ; CHECK-NEXT: vfmul.vf v8, v8, fa0
3039 ; CHECK-NEXT: vse64.v v8, (a0)
3041 %a = load <2 x double>, ptr %x
3042 %b = insertelement <2 x double> poison, double %y, i32 0
3043 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
3044 %d = fmul <2 x double> %c, %a
3045 store <2 x double> %d, ptr %x
3049 define void @fdiv_vf_v8f16(ptr %x, half %y) {
3050 ; ZVFH-LABEL: fdiv_vf_v8f16:
3052 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3053 ; ZVFH-NEXT: vle16.v v8, (a0)
3054 ; ZVFH-NEXT: vfdiv.vf v8, v8, fa0
3055 ; ZVFH-NEXT: vse16.v v8, (a0)
3058 ; ZVFHMIN-LABEL: fdiv_vf_v8f16:
3060 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3061 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3062 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
3063 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3064 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
3065 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3066 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
3067 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3068 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
3069 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
3070 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3071 ; ZVFHMIN-NEXT: vfdiv.vv v8, v9, v8
3072 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3073 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
3074 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
3076 %a = load <8 x half>, ptr %x
3077 %b = insertelement <8 x half> poison, half %y, i32 0
3078 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
3079 %d = fdiv <8 x half> %a, %c
3080 store <8 x half> %d, ptr %x
3084 define void @fdiv_vf_v6f16(ptr %x, half %y) {
3085 ; ZVFH-LABEL: fdiv_vf_v6f16:
3087 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3088 ; ZVFH-NEXT: vle16.v v8, (a0)
3089 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3090 ; ZVFH-NEXT: vfdiv.vf v8, v8, fa0
3091 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3092 ; ZVFH-NEXT: vse16.v v8, (a0)
3095 ; ZVFHMIN-RV32-LABEL: fdiv_vf_v6f16:
3096 ; ZVFHMIN-RV32: # %bb.0:
3097 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3098 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
3099 ; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
3100 ; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3101 ; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
3102 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3103 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
3104 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3105 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
3106 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
3107 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3108 ; ZVFHMIN-RV32-NEXT: vfdiv.vv v8, v9, v8
3109 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3110 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
3111 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3112 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
3113 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
3114 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
3115 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
3116 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
3117 ; ZVFHMIN-RV32-NEXT: ret
3119 ; ZVFHMIN-RV64-LABEL: fdiv_vf_v6f16:
3120 ; ZVFHMIN-RV64: # %bb.0:
3121 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3122 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
3123 ; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
3124 ; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3125 ; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
3126 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3127 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
3128 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3129 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
3130 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
3131 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3132 ; ZVFHMIN-RV64-NEXT: vfdiv.vv v8, v9, v8
3133 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3134 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
3135 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3136 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
3137 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
3138 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
3139 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
3140 ; ZVFHMIN-RV64-NEXT: ret
3141 %a = load <6 x half>, ptr %x
3142 %b = insertelement <6 x half> poison, half %y, i32 0
3143 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
3144 %d = fdiv <6 x half> %a, %c
3145 store <6 x half> %d, ptr %x
3149 define void @fdiv_vf_v4f32(ptr %x, float %y) {
3150 ; ZVFH-LABEL: fdiv_vf_v4f32:
3152 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3153 ; ZVFH-NEXT: vle32.v v8, (a0)
3154 ; ZVFH-NEXT: vfdiv.vf v8, v8, fa0
3155 ; ZVFH-NEXT: vse32.v v8, (a0)
3158 ; ZVFHMIN-LABEL: fdiv_vf_v4f32:
3160 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
3161 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
3162 ; ZVFHMIN-NEXT: vfdiv.vf v8, v8, fa0
3163 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
3165 %a = load <4 x float>, ptr %x
3166 %b = insertelement <4 x float> poison, float %y, i32 0
3167 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
3168 %d = fdiv <4 x float> %a, %c
3169 store <4 x float> %d, ptr %x
3173 define void @fdiv_vf_v2f64(ptr %x, double %y) {
3174 ; CHECK-LABEL: fdiv_vf_v2f64:
3176 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3177 ; CHECK-NEXT: vle64.v v8, (a0)
3178 ; CHECK-NEXT: vfdiv.vf v8, v8, fa0
3179 ; CHECK-NEXT: vse64.v v8, (a0)
3181 %a = load <2 x double>, ptr %x
3182 %b = insertelement <2 x double> poison, double %y, i32 0
3183 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
3184 %d = fdiv <2 x double> %a, %c
3185 store <2 x double> %d, ptr %x
3189 define void @fdiv_fv_v8f16(ptr %x, half %y) {
3190 ; ZVFH-LABEL: fdiv_fv_v8f16:
3192 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3193 ; ZVFH-NEXT: vle16.v v8, (a0)
3194 ; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0
3195 ; ZVFH-NEXT: vse16.v v8, (a0)
3198 ; ZVFHMIN-LABEL: fdiv_fv_v8f16:
3200 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3201 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3202 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
3203 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3204 ; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
3205 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3206 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
3207 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3208 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
3209 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
3210 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3211 ; ZVFHMIN-NEXT: vfdiv.vv v8, v8, v9
3212 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3213 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
3214 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
3216 %a = load <8 x half>, ptr %x
3217 %b = insertelement <8 x half> poison, half %y, i32 0
3218 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
3219 %d = fdiv <8 x half> %c, %a
3220 store <8 x half> %d, ptr %x
3224 define void @fdiv_fv_v6f16(ptr %x, half %y) {
3225 ; ZVFH-LABEL: fdiv_fv_v6f16:
3227 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3228 ; ZVFH-NEXT: vle16.v v8, (a0)
3229 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3230 ; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0
3231 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3232 ; ZVFH-NEXT: vse16.v v8, (a0)
3235 ; ZVFHMIN-RV32-LABEL: fdiv_fv_v6f16:
3236 ; ZVFHMIN-RV32: # %bb.0:
3237 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3238 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
3239 ; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
3240 ; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3241 ; ZVFHMIN-RV32-NEXT: vfmv.v.f v9, fa5
3242 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3243 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
3244 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3245 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
3246 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v10
3247 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3248 ; ZVFHMIN-RV32-NEXT: vfdiv.vv v8, v8, v9
3249 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3250 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
3251 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3252 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
3253 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
3254 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
3255 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
3256 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
3257 ; ZVFHMIN-RV32-NEXT: ret
3259 ; ZVFHMIN-RV64-LABEL: fdiv_fv_v6f16:
3260 ; ZVFHMIN-RV64: # %bb.0:
3261 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3262 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
3263 ; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
3264 ; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3265 ; ZVFHMIN-RV64-NEXT: vfmv.v.f v9, fa5
3266 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3267 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
3268 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3269 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
3270 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v10
3271 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3272 ; ZVFHMIN-RV64-NEXT: vfdiv.vv v8, v8, v9
3273 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3274 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
3275 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3276 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
3277 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
3278 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
3279 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
3280 ; ZVFHMIN-RV64-NEXT: ret
3281 %a = load <6 x half>, ptr %x
3282 %b = insertelement <6 x half> poison, half %y, i32 0
3283 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
3284 %d = fdiv <6 x half> %c, %a
3285 store <6 x half> %d, ptr %x
3289 define void @fdiv_fv_v4f32(ptr %x, float %y) {
3290 ; ZVFH-LABEL: fdiv_fv_v4f32:
3292 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3293 ; ZVFH-NEXT: vle32.v v8, (a0)
3294 ; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0
3295 ; ZVFH-NEXT: vse32.v v8, (a0)
3298 ; ZVFHMIN-LABEL: fdiv_fv_v4f32:
3300 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
3301 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
3302 ; ZVFHMIN-NEXT: vfrdiv.vf v8, v8, fa0
3303 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
3305 %a = load <4 x float>, ptr %x
3306 %b = insertelement <4 x float> poison, float %y, i32 0
3307 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
3308 %d = fdiv <4 x float> %c, %a
3309 store <4 x float> %d, ptr %x
3313 define void @fdiv_fv_v2f64(ptr %x, double %y) {
3314 ; CHECK-LABEL: fdiv_fv_v2f64:
3316 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3317 ; CHECK-NEXT: vle64.v v8, (a0)
3318 ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0
3319 ; CHECK-NEXT: vse64.v v8, (a0)
3321 %a = load <2 x double>, ptr %x
3322 %b = insertelement <2 x double> poison, double %y, i32 0
3323 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
3324 %d = fdiv <2 x double> %c, %a
3325 store <2 x double> %d, ptr %x
3329 define void @fma_vf_v8f16(ptr %x, ptr %y, half %z) {
3330 ; ZVFH-LABEL: fma_vf_v8f16:
3332 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3333 ; ZVFH-NEXT: vle16.v v8, (a0)
3334 ; ZVFH-NEXT: vle16.v v9, (a1)
3335 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
3336 ; ZVFH-NEXT: vse16.v v9, (a0)
3339 ; ZVFHMIN-LABEL: fma_vf_v8f16:
3341 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3342 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3343 ; ZVFHMIN-NEXT: vle16.v v9, (a1)
3344 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
3345 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3346 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
3347 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3348 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
3349 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3350 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
3351 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
3352 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
3353 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3354 ; ZVFHMIN-NEXT: vfmadd.vv v8, v9, v10
3355 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3356 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
3357 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
3359 %a = load <8 x half>, ptr %x
3360 %b = load <8 x half>, ptr %y
3361 %c = insertelement <8 x half> poison, half %z, i32 0
3362 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
3363 %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %b)
3364 store <8 x half> %e, ptr %x
3368 define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
3369 ; ZVFH-LABEL: fma_vf_v6f16:
3371 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3372 ; ZVFH-NEXT: vle16.v v8, (a0)
3373 ; ZVFH-NEXT: vle16.v v9, (a1)
3374 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3375 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
3376 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3377 ; ZVFH-NEXT: vse16.v v9, (a0)
3380 ; ZVFHMIN-RV32-LABEL: fma_vf_v6f16:
3381 ; ZVFHMIN-RV32: # %bb.0:
3382 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3383 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
3384 ; ZVFHMIN-RV32-NEXT: vle16.v v9, (a1)
3385 ; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
3386 ; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3387 ; ZVFHMIN-RV32-NEXT: vfmv.v.f v10, fa5
3388 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3389 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v10
3390 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3391 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
3392 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
3393 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v11
3394 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3395 ; ZVFHMIN-RV32-NEXT: vfmadd.vv v8, v9, v10
3396 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3397 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
3398 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3399 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
3400 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
3401 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
3402 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
3403 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
3404 ; ZVFHMIN-RV32-NEXT: ret
3406 ; ZVFHMIN-RV64-LABEL: fma_vf_v6f16:
3407 ; ZVFHMIN-RV64: # %bb.0:
3408 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3409 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
3410 ; ZVFHMIN-RV64-NEXT: vle16.v v9, (a1)
3411 ; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
3412 ; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3413 ; ZVFHMIN-RV64-NEXT: vfmv.v.f v10, fa5
3414 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3415 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v10
3416 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3417 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
3418 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
3419 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v11
3420 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3421 ; ZVFHMIN-RV64-NEXT: vfmadd.vv v8, v9, v10
3422 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3423 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
3424 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3425 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
3426 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
3427 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
3428 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
3429 ; ZVFHMIN-RV64-NEXT: ret
3430 %a = load <6 x half>, ptr %x
3431 %b = load <6 x half>, ptr %y
3432 %c = insertelement <6 x half> poison, half %z, i32 0
3433 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
3434 %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %d, <6 x half> %b)
3435 store <6 x half> %e, ptr %x
3439 define void @fma_vf_v4f32(ptr %x, ptr %y, float %z) {
3440 ; ZVFH-LABEL: fma_vf_v4f32:
3442 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3443 ; ZVFH-NEXT: vle32.v v8, (a0)
3444 ; ZVFH-NEXT: vle32.v v9, (a1)
3445 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
3446 ; ZVFH-NEXT: vse32.v v9, (a0)
3449 ; ZVFHMIN-LABEL: fma_vf_v4f32:
3451 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
3452 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
3453 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
3454 ; ZVFHMIN-NEXT: vfmacc.vf v9, fa0, v8
3455 ; ZVFHMIN-NEXT: vse32.v v9, (a0)
3457 %a = load <4 x float>, ptr %x
3458 %b = load <4 x float>, ptr %y
3459 %c = insertelement <4 x float> poison, float %z, i32 0
3460 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
3461 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %d, <4 x float> %b)
3462 store <4 x float> %e, ptr %x
3466 define void @fma_vf_v2f64(ptr %x, ptr %y, double %z) {
3467 ; CHECK-LABEL: fma_vf_v2f64:
3469 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3470 ; CHECK-NEXT: vle64.v v8, (a0)
3471 ; CHECK-NEXT: vle64.v v9, (a1)
3472 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
3473 ; CHECK-NEXT: vse64.v v9, (a0)
3475 %a = load <2 x double>, ptr %x
3476 %b = load <2 x double>, ptr %y
3477 %c = insertelement <2 x double> poison, double %z, i32 0
3478 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
3479 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %d, <2 x double> %b)
3480 store <2 x double> %e, ptr %x
3484 define void @fma_fv_v8f16(ptr %x, ptr %y, half %z) {
3485 ; ZVFH-LABEL: fma_fv_v8f16:
3487 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3488 ; ZVFH-NEXT: vle16.v v8, (a0)
3489 ; ZVFH-NEXT: vle16.v v9, (a1)
3490 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
3491 ; ZVFH-NEXT: vse16.v v9, (a0)
3494 ; ZVFHMIN-LABEL: fma_fv_v8f16:
3496 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3497 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3498 ; ZVFHMIN-NEXT: vle16.v v9, (a1)
3499 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
3500 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3501 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
3502 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3503 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
3504 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3505 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
3506 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
3507 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
3508 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3509 ; ZVFHMIN-NEXT: vfmadd.vv v8, v9, v10
3510 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3511 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
3512 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
3514 %a = load <8 x half>, ptr %x
3515 %b = load <8 x half>, ptr %y
3516 %c = insertelement <8 x half> poison, half %z, i32 0
3517 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
3518 %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %d, <8 x half> %a, <8 x half> %b)
3519 store <8 x half> %e, ptr %x
3523 define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
3524 ; ZVFH-LABEL: fma_fv_v6f16:
3526 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3527 ; ZVFH-NEXT: vle16.v v8, (a0)
3528 ; ZVFH-NEXT: vle16.v v9, (a1)
3529 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3530 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
3531 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3532 ; ZVFH-NEXT: vse16.v v9, (a0)
3535 ; ZVFHMIN-RV32-LABEL: fma_fv_v6f16:
3536 ; ZVFHMIN-RV32: # %bb.0:
3537 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3538 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
3539 ; ZVFHMIN-RV32-NEXT: vle16.v v9, (a1)
3540 ; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
3541 ; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3542 ; ZVFHMIN-RV32-NEXT: vfmv.v.f v10, fa5
3543 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3544 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v10
3545 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3546 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
3547 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
3548 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v11
3549 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3550 ; ZVFHMIN-RV32-NEXT: vfmadd.vv v8, v9, v10
3551 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3552 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
3553 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3554 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
3555 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
3556 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
3557 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
3558 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
3559 ; ZVFHMIN-RV32-NEXT: ret
3561 ; ZVFHMIN-RV64-LABEL: fma_fv_v6f16:
3562 ; ZVFHMIN-RV64: # %bb.0:
3563 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3564 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
3565 ; ZVFHMIN-RV64-NEXT: vle16.v v9, (a1)
3566 ; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
3567 ; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3568 ; ZVFHMIN-RV64-NEXT: vfmv.v.f v10, fa5
3569 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3570 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v10
3571 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3572 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
3573 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
3574 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v11
3575 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3576 ; ZVFHMIN-RV64-NEXT: vfmadd.vv v8, v9, v10
3577 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3578 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
3579 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3580 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
3581 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
3582 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
3583 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
3584 ; ZVFHMIN-RV64-NEXT: ret
3585 %a = load <6 x half>, ptr %x
3586 %b = load <6 x half>, ptr %y
3587 %c = insertelement <6 x half> poison, half %z, i32 0
3588 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
3589 %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %d, <6 x half> %a, <6 x half> %b)
3590 store <6 x half> %e, ptr %x
3594 define void @fma_fv_v4f32(ptr %x, ptr %y, float %z) {
3595 ; ZVFH-LABEL: fma_fv_v4f32:
3597 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3598 ; ZVFH-NEXT: vle32.v v8, (a0)
3599 ; ZVFH-NEXT: vle32.v v9, (a1)
3600 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
3601 ; ZVFH-NEXT: vse32.v v9, (a0)
3604 ; ZVFHMIN-LABEL: fma_fv_v4f32:
3606 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
3607 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
3608 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
3609 ; ZVFHMIN-NEXT: vfmacc.vf v9, fa0, v8
3610 ; ZVFHMIN-NEXT: vse32.v v9, (a0)
3612 %a = load <4 x float>, ptr %x
3613 %b = load <4 x float>, ptr %y
3614 %c = insertelement <4 x float> poison, float %z, i32 0
3615 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
3616 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %d, <4 x float> %a, <4 x float> %b)
3617 store <4 x float> %e, ptr %x
3621 define void @fma_fv_v2f64(ptr %x, ptr %y, double %z) {
3622 ; CHECK-LABEL: fma_fv_v2f64:
3624 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3625 ; CHECK-NEXT: vle64.v v8, (a0)
3626 ; CHECK-NEXT: vle64.v v9, (a1)
3627 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
3628 ; CHECK-NEXT: vse64.v v9, (a0)
3630 %a = load <2 x double>, ptr %x
3631 %b = load <2 x double>, ptr %y
3632 %c = insertelement <2 x double> poison, double %z, i32 0
3633 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
3634 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %d, <2 x double> %a, <2 x double> %b)
3635 store <2 x double> %e, ptr %x
3639 define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
3640 ; ZVFH-LABEL: fmsub_vf_v8f16:
3642 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3643 ; ZVFH-NEXT: vle16.v v8, (a0)
3644 ; ZVFH-NEXT: vle16.v v9, (a1)
3645 ; ZVFH-NEXT: vfmsac.vf v9, fa0, v8
3646 ; ZVFH-NEXT: vse16.v v9, (a0)
3649 ; ZVFHMIN-LABEL: fmsub_vf_v8f16:
3651 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3652 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3653 ; ZVFHMIN-NEXT: vle16.v v9, (a1)
3654 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0
3655 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3656 ; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
3657 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3658 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v10
3659 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3660 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9
3661 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3662 ; ZVFHMIN-NEXT: vfneg.v v9, v10
3663 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3664 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
3665 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
3666 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
3667 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10
3668 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3669 ; ZVFHMIN-NEXT: vfmacc.vv v11, v9, v8
3670 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3671 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v11
3672 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
3674 %a = load <8 x half>, ptr %x
3675 %b = load <8 x half>, ptr %y
3676 %c = insertelement <8 x half> poison, half %z, i32 0
3677 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
3678 %neg = fneg <8 x half> %b
3679 %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %neg)
3680 store <8 x half> %e, ptr %x
3684 define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
3685 ; ZVFH-LABEL: fmsub_vf_v6f16:
3687 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3688 ; ZVFH-NEXT: vle16.v v8, (a0)
3689 ; ZVFH-NEXT: vle16.v v9, (a1)
3690 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3691 ; ZVFH-NEXT: vfmsac.vf v9, fa0, v8
3692 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3693 ; ZVFH-NEXT: vse16.v v9, (a0)
3696 ; ZVFHMIN-RV32-LABEL: fmsub_vf_v6f16:
3697 ; ZVFHMIN-RV32: # %bb.0:
3698 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3699 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
3700 ; ZVFHMIN-RV32-NEXT: vle16.v v9, (a1)
3701 ; ZVFHMIN-RV32-NEXT: fcvt.s.h fa5, fa0
3702 ; ZVFHMIN-RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3703 ; ZVFHMIN-RV32-NEXT: vfmv.v.f v10, fa5
3704 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3705 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v11, v10
3706 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3707 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v10, v9
3708 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3709 ; ZVFHMIN-RV32-NEXT: vfneg.v v9, v10
3710 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3711 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v10, v9
3712 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
3713 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v11
3714 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v10
3715 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3716 ; ZVFHMIN-RV32-NEXT: vfmacc.vv v11, v9, v8
3717 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3718 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v11
3719 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3720 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
3721 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
3722 ; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
3723 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
3724 ; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
3725 ; ZVFHMIN-RV32-NEXT: ret
3727 ; ZVFHMIN-RV64-LABEL: fmsub_vf_v6f16:
3728 ; ZVFHMIN-RV64: # %bb.0:
3729 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3730 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
3731 ; ZVFHMIN-RV64-NEXT: vle16.v v9, (a1)
3732 ; ZVFHMIN-RV64-NEXT: fcvt.s.h fa5, fa0
3733 ; ZVFHMIN-RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
3734 ; ZVFHMIN-RV64-NEXT: vfmv.v.f v10, fa5
3735 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3736 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v11, v10
3737 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3738 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v10, v9
3739 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3740 ; ZVFHMIN-RV64-NEXT: vfneg.v v9, v10
3741 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3742 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v10, v9
3743 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
3744 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v11
3745 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v10
3746 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3747 ; ZVFHMIN-RV64-NEXT: vfmacc.vv v11, v9, v8
3748 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
3749 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v11
3750 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
3751 ; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
3752 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
3753 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
3754 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
3755 ; ZVFHMIN-RV64-NEXT: ret
3756 %a = load <6 x half>, ptr %x
3757 %b = load <6 x half>, ptr %y
3758 %c = insertelement <6 x half> poison, half %z, i32 0
3759 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
3760 %neg = fneg <6 x half> %b
3761 %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %d, <6 x half> %neg)
3762 store <6 x half> %e, ptr %x
3766 define void @fnmsub_vf_v4f32(ptr %x, ptr %y, float %z) {
3767 ; ZVFH-LABEL: fnmsub_vf_v4f32:
3769 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3770 ; ZVFH-NEXT: vle32.v v8, (a0)
3771 ; ZVFH-NEXT: vle32.v v9, (a1)
3772 ; ZVFH-NEXT: vfnmsac.vf v9, fa0, v8
3773 ; ZVFH-NEXT: vse32.v v9, (a0)
3776 ; ZVFHMIN-LABEL: fnmsub_vf_v4f32:
3778 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
3779 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
3780 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
3781 ; ZVFHMIN-NEXT: vfnmsac.vf v9, fa0, v8
3782 ; ZVFHMIN-NEXT: vse32.v v9, (a0)
3784 %a = load <4 x float>, ptr %x
3785 %b = load <4 x float>, ptr %y
3786 %c = insertelement <4 x float> poison, float %z, i32 0
3787 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
3788 %neg = fneg <4 x float> %a
3789 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %d, <4 x float> %b)
3790 store <4 x float> %e, ptr %x
3794 define void @fnmadd_vf_v2f64(ptr %x, ptr %y, double %z) {
3795 ; CHECK-LABEL: fnmadd_vf_v2f64:
3797 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3798 ; CHECK-NEXT: vle64.v v8, (a0)
3799 ; CHECK-NEXT: vle64.v v9, (a1)
3800 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8
3801 ; CHECK-NEXT: vse64.v v9, (a0)
3803 %a = load <2 x double>, ptr %x
3804 %b = load <2 x double>, ptr %y
3805 %c = insertelement <2 x double> poison, double %z, i32 0
3806 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
3807 %neg = fneg <2 x double> %a
3808 %neg2 = fneg <2 x double> %b
3809 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %neg, <2 x double> %d, <2 x double> %neg2)
3810 store <2 x double> %e, ptr %x
3814 define void @fnmsub_fv_v4f32(ptr %x, ptr %y, float %z) {
3815 ; ZVFH-LABEL: fnmsub_fv_v4f32:
3817 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3818 ; ZVFH-NEXT: vle32.v v8, (a0)
3819 ; ZVFH-NEXT: vle32.v v9, (a1)
3820 ; ZVFH-NEXT: vfnmsac.vf v9, fa0, v8
3821 ; ZVFH-NEXT: vse32.v v9, (a0)
3824 ; ZVFHMIN-LABEL: fnmsub_fv_v4f32:
3826 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
3827 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
3828 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
3829 ; ZVFHMIN-NEXT: vfnmsac.vf v9, fa0, v8
3830 ; ZVFHMIN-NEXT: vse32.v v9, (a0)
3832 %a = load <4 x float>, ptr %x
3833 %b = load <4 x float>, ptr %y
3834 %c = insertelement <4 x float> poison, float %z, i32 0
3835 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
3836 %neg = fneg <4 x float> %d
3837 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %a, <4 x float> %b)
3838 store <4 x float> %e, ptr %x
3842 define void @fnmadd_fv_v2f64(ptr %x, ptr %y, double %z) {
3843 ; CHECK-LABEL: fnmadd_fv_v2f64:
3845 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3846 ; CHECK-NEXT: vle64.v v8, (a0)
3847 ; CHECK-NEXT: vle64.v v9, (a1)
3848 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8
3849 ; CHECK-NEXT: vse64.v v9, (a0)
3851 %a = load <2 x double>, ptr %x
3852 %b = load <2 x double>, ptr %y
3853 %c = insertelement <2 x double> poison, double %z, i32 0
3854 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
3855 %neg = fneg <2 x double> %d
3856 %neg2 = fneg <2 x double> %b
3857 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %neg, <2 x double> %a, <2 x double> %neg2)
3858 store <2 x double> %e, ptr %x
3862 define void @trunc_v8f16(ptr %x) {
3863 ; ZVFH-LABEL: trunc_v8f16:
3865 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3866 ; ZVFH-NEXT: vle16.v v8, (a0)
3867 ; ZVFH-NEXT: lui a1, %hi(.LCPI115_0)
3868 ; ZVFH-NEXT: flh fa5, %lo(.LCPI115_0)(a1)
3869 ; ZVFH-NEXT: vfabs.v v9, v8
3870 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
3871 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
3872 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
3873 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
3874 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
3875 ; ZVFH-NEXT: vse16.v v8, (a0)
3877 %a = load <8 x half>, ptr %x
3878 %b = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a)
3879 store <8 x half> %b, ptr %x
3882 declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
3884 define void @trunc_v6f16(ptr %x) {
3885 ; ZVFH-LABEL: trunc_v6f16:
3887 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3888 ; ZVFH-NEXT: vle16.v v8, (a0)
3889 ; ZVFH-NEXT: lui a1, %hi(.LCPI116_0)
3890 ; ZVFH-NEXT: flh fa5, %lo(.LCPI116_0)(a1)
3891 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3892 ; ZVFH-NEXT: vfabs.v v9, v8
3893 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
3894 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
3895 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
3896 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
3897 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
3898 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3899 ; ZVFH-NEXT: vse16.v v8, (a0)
3901 %a = load <6 x half>, ptr %x
3902 %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a)
3903 store <6 x half> %b, ptr %x
3906 declare <6 x half> @llvm.trunc.v6f16(<6 x half>)
3908 define void @trunc_v4f32(ptr %x) {
3909 ; ZVFH-LABEL: trunc_v4f32:
3911 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3912 ; ZVFH-NEXT: vle32.v v8, (a0)
3913 ; ZVFH-NEXT: vfabs.v v9, v8
3914 ; ZVFH-NEXT: lui a1, 307200
3915 ; ZVFH-NEXT: fmv.w.x fa5, a1
3916 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
3917 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
3918 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
3919 ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu
3920 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
3921 ; ZVFH-NEXT: vse32.v v8, (a0)
3924 ; ZVFHMIN-LABEL: trunc_v4f32:
3926 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
3927 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
3928 ; ZVFHMIN-NEXT: vfabs.v v9, v8
3929 ; ZVFHMIN-NEXT: lui a1, 307200
3930 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
3931 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5
3932 ; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
3933 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t
3934 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
3935 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t
3936 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
3938 %a = load <4 x float>, ptr %x
3939 %b = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
3940 store <4 x float> %b, ptr %x
3943 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
3945 define void @trunc_v2f64(ptr %x) {
3946 ; CHECK-LABEL: trunc_v2f64:
3948 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3949 ; CHECK-NEXT: vle64.v v8, (a0)
3950 ; CHECK-NEXT: lui a1, %hi(.LCPI118_0)
3951 ; CHECK-NEXT: fld fa5, %lo(.LCPI118_0)(a1)
3952 ; CHECK-NEXT: vfabs.v v9, v8
3953 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
3954 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
3955 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
3956 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
3957 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
3958 ; CHECK-NEXT: vse64.v v8, (a0)
3960 %a = load <2 x double>, ptr %x
3961 %b = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a)
3962 store <2 x double> %b, ptr %x
3965 declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
3967 define void @ceil_v8f16(ptr %x) {
3968 ; ZVFH-LABEL: ceil_v8f16:
3970 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3971 ; ZVFH-NEXT: vle16.v v8, (a0)
3972 ; ZVFH-NEXT: lui a1, %hi(.LCPI119_0)
3973 ; ZVFH-NEXT: flh fa5, %lo(.LCPI119_0)(a1)
3974 ; ZVFH-NEXT: vfabs.v v9, v8
3975 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
3976 ; ZVFH-NEXT: fsrmi a1, 3
3977 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
3978 ; ZVFH-NEXT: fsrm a1
3979 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
3980 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
3981 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
3982 ; ZVFH-NEXT: vse16.v v8, (a0)
3985 ; ZVFHMIN-LABEL: ceil_v8f16:
3987 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
3988 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3989 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
3990 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
3991 ; ZVFHMIN-NEXT: vfabs.v v8, v9
3992 ; ZVFHMIN-NEXT: lui a1, 307200
3993 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
3994 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
3995 ; ZVFHMIN-NEXT: fsrmi a1, 3
3996 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
3997 ; ZVFHMIN-NEXT: fsrm a1
3998 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
3999 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4000 ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
4001 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4002 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
4003 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
4005 %a = load <8 x half>, ptr %x
4006 %b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
4007 store <8 x half> %b, ptr %x
4010 declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
4012 define void @ceil_v6f16(ptr %x) {
4013 ; ZVFH-LABEL: ceil_v6f16:
4015 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4016 ; ZVFH-NEXT: vle16.v v8, (a0)
4017 ; ZVFH-NEXT: lui a1, %hi(.LCPI120_0)
4018 ; ZVFH-NEXT: flh fa5, %lo(.LCPI120_0)(a1)
4019 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4020 ; ZVFH-NEXT: vfabs.v v9, v8
4021 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4022 ; ZVFH-NEXT: fsrmi a1, 3
4023 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4024 ; ZVFH-NEXT: fsrm a1
4025 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4026 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4027 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4028 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4029 ; ZVFH-NEXT: vse16.v v8, (a0)
4032 ; ZVFHMIN-RV32-LABEL: ceil_v6f16:
4033 ; ZVFHMIN-RV32: # %bb.0:
4034 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4035 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
4036 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
4037 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4038 ; ZVFHMIN-RV32-NEXT: vfabs.v v8, v9
4039 ; ZVFHMIN-RV32-NEXT: lui a1, 307200
4040 ; ZVFHMIN-RV32-NEXT: fmv.w.x fa5, a1
4041 ; ZVFHMIN-RV32-NEXT: vmflt.vf v0, v8, fa5
4042 ; ZVFHMIN-RV32-NEXT: fsrmi a1, 3
4043 ; ZVFHMIN-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t
4044 ; ZVFHMIN-RV32-NEXT: fsrm a1
4045 ; ZVFHMIN-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t
4046 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4047 ; ZVFHMIN-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t
4048 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4049 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
4050 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4051 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
4052 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
4053 ; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
4054 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4055 ; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
4056 ; ZVFHMIN-RV32-NEXT: ret
4058 ; ZVFHMIN-RV64-LABEL: ceil_v6f16:
4059 ; ZVFHMIN-RV64: # %bb.0:
4060 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4061 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
4062 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
4063 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4064 ; ZVFHMIN-RV64-NEXT: vfabs.v v8, v9
4065 ; ZVFHMIN-RV64-NEXT: lui a1, 307200
4066 ; ZVFHMIN-RV64-NEXT: fmv.w.x fa5, a1
4067 ; ZVFHMIN-RV64-NEXT: vmflt.vf v0, v8, fa5
4068 ; ZVFHMIN-RV64-NEXT: fsrmi a1, 3
4069 ; ZVFHMIN-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t
4070 ; ZVFHMIN-RV64-NEXT: fsrm a1
4071 ; ZVFHMIN-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t
4072 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4073 ; ZVFHMIN-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t
4074 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4075 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
4076 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4077 ; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
4078 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
4079 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
4080 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
4081 ; ZVFHMIN-RV64-NEXT: ret
4082 %a = load <6 x half>, ptr %x
4083 %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a)
4084 store <6 x half> %b, ptr %x
4087 declare <6 x half> @llvm.ceil.v6f16(<6 x half>)
4089 define void @ceil_v4f32(ptr %x) {
4090 ; ZVFH-LABEL: ceil_v4f32:
4092 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4093 ; ZVFH-NEXT: vle32.v v8, (a0)
4094 ; ZVFH-NEXT: vfabs.v v9, v8
4095 ; ZVFH-NEXT: lui a1, 307200
4096 ; ZVFH-NEXT: fmv.w.x fa5, a1
4097 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4098 ; ZVFH-NEXT: fsrmi a1, 3
4099 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4100 ; ZVFH-NEXT: fsrm a1
4101 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4102 ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4103 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4104 ; ZVFH-NEXT: vse32.v v8, (a0)
4107 ; ZVFHMIN-LABEL: ceil_v4f32:
4109 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
4110 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
4111 ; ZVFHMIN-NEXT: vfabs.v v9, v8
4112 ; ZVFHMIN-NEXT: lui a1, 307200
4113 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4114 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5
4115 ; ZVFHMIN-NEXT: fsrmi a1, 3
4116 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t
4117 ; ZVFHMIN-NEXT: fsrm a1
4118 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t
4119 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
4120 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4121 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
4123 %a = load <4 x float>, ptr %x
4124 %b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
4125 store <4 x float> %b, ptr %x
4128 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
4130 define void @ceil_v2f64(ptr %x) {
4131 ; CHECK-LABEL: ceil_v2f64:
4133 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4134 ; CHECK-NEXT: vle64.v v8, (a0)
4135 ; CHECK-NEXT: lui a1, %hi(.LCPI122_0)
4136 ; CHECK-NEXT: fld fa5, %lo(.LCPI122_0)(a1)
4137 ; CHECK-NEXT: vfabs.v v9, v8
4138 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4139 ; CHECK-NEXT: fsrmi a1, 3
4140 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4141 ; CHECK-NEXT: fsrm a1
4142 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4143 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
4144 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4145 ; CHECK-NEXT: vse64.v v8, (a0)
4147 %a = load <2 x double>, ptr %x
4148 %b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
4149 store <2 x double> %b, ptr %x
4152 declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
4154 define void @floor_v8f16(ptr %x) {
4155 ; ZVFH-LABEL: floor_v8f16:
4157 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4158 ; ZVFH-NEXT: vle16.v v8, (a0)
4159 ; ZVFH-NEXT: lui a1, %hi(.LCPI123_0)
4160 ; ZVFH-NEXT: flh fa5, %lo(.LCPI123_0)(a1)
4161 ; ZVFH-NEXT: vfabs.v v9, v8
4162 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4163 ; ZVFH-NEXT: fsrmi a1, 2
4164 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4165 ; ZVFH-NEXT: fsrm a1
4166 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4167 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4168 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4169 ; ZVFH-NEXT: vse16.v v8, (a0)
4172 ; ZVFHMIN-LABEL: floor_v8f16:
4174 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4175 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4176 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
4177 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4178 ; ZVFHMIN-NEXT: vfabs.v v8, v9
4179 ; ZVFHMIN-NEXT: lui a1, 307200
4180 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4181 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4182 ; ZVFHMIN-NEXT: fsrmi a1, 2
4183 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
4184 ; ZVFHMIN-NEXT: fsrm a1
4185 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
4186 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4187 ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
4188 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4189 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
4190 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
4192 %a = load <8 x half>, ptr %x
4193 %b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
4194 store <8 x half> %b, ptr %x
4197 declare <8 x half> @llvm.floor.v8f16(<8 x half>)
4199 define void @floor_v6f16(ptr %x) {
4200 ; ZVFH-LABEL: floor_v6f16:
4202 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4203 ; ZVFH-NEXT: vle16.v v8, (a0)
4204 ; ZVFH-NEXT: lui a1, %hi(.LCPI124_0)
4205 ; ZVFH-NEXT: flh fa5, %lo(.LCPI124_0)(a1)
4206 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4207 ; ZVFH-NEXT: vfabs.v v9, v8
4208 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4209 ; ZVFH-NEXT: fsrmi a1, 2
4210 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4211 ; ZVFH-NEXT: fsrm a1
4212 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4213 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4214 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4215 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4216 ; ZVFH-NEXT: vse16.v v8, (a0)
4219 ; ZVFHMIN-RV32-LABEL: floor_v6f16:
4220 ; ZVFHMIN-RV32: # %bb.0:
4221 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4222 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
4223 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
4224 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4225 ; ZVFHMIN-RV32-NEXT: vfabs.v v8, v9
4226 ; ZVFHMIN-RV32-NEXT: lui a1, 307200
4227 ; ZVFHMIN-RV32-NEXT: fmv.w.x fa5, a1
4228 ; ZVFHMIN-RV32-NEXT: vmflt.vf v0, v8, fa5
4229 ; ZVFHMIN-RV32-NEXT: fsrmi a1, 2
4230 ; ZVFHMIN-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t
4231 ; ZVFHMIN-RV32-NEXT: fsrm a1
4232 ; ZVFHMIN-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t
4233 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4234 ; ZVFHMIN-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t
4235 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4236 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
4237 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4238 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
4239 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
4240 ; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
4241 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4242 ; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
4243 ; ZVFHMIN-RV32-NEXT: ret
4245 ; ZVFHMIN-RV64-LABEL: floor_v6f16:
4246 ; ZVFHMIN-RV64: # %bb.0:
4247 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4248 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
4249 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
4250 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4251 ; ZVFHMIN-RV64-NEXT: vfabs.v v8, v9
4252 ; ZVFHMIN-RV64-NEXT: lui a1, 307200
4253 ; ZVFHMIN-RV64-NEXT: fmv.w.x fa5, a1
4254 ; ZVFHMIN-RV64-NEXT: vmflt.vf v0, v8, fa5
4255 ; ZVFHMIN-RV64-NEXT: fsrmi a1, 2
4256 ; ZVFHMIN-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t
4257 ; ZVFHMIN-RV64-NEXT: fsrm a1
4258 ; ZVFHMIN-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t
4259 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4260 ; ZVFHMIN-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t
4261 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4262 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
4263 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4264 ; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
4265 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
4266 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
4267 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
4268 ; ZVFHMIN-RV64-NEXT: ret
4269 %a = load <6 x half>, ptr %x
4270 %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a)
4271 store <6 x half> %b, ptr %x
4274 declare <6 x half> @llvm.floor.v6f16(<6 x half>)
4276 define void @floor_v4f32(ptr %x) {
4277 ; ZVFH-LABEL: floor_v4f32:
4279 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4280 ; ZVFH-NEXT: vle32.v v8, (a0)
4281 ; ZVFH-NEXT: vfabs.v v9, v8
4282 ; ZVFH-NEXT: lui a1, 307200
4283 ; ZVFH-NEXT: fmv.w.x fa5, a1
4284 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4285 ; ZVFH-NEXT: fsrmi a1, 2
4286 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4287 ; ZVFH-NEXT: fsrm a1
4288 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4289 ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4290 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4291 ; ZVFH-NEXT: vse32.v v8, (a0)
4294 ; ZVFHMIN-LABEL: floor_v4f32:
4296 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
4297 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
4298 ; ZVFHMIN-NEXT: vfabs.v v9, v8
4299 ; ZVFHMIN-NEXT: lui a1, 307200
4300 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4301 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5
4302 ; ZVFHMIN-NEXT: fsrmi a1, 2
4303 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t
4304 ; ZVFHMIN-NEXT: fsrm a1
4305 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t
4306 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
4307 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4308 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
4310 %a = load <4 x float>, ptr %x
4311 %b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
4312 store <4 x float> %b, ptr %x
4315 declare <4 x float> @llvm.floor.v4f32(<4 x float>)
4317 define void @floor_v2f64(ptr %x) {
4318 ; CHECK-LABEL: floor_v2f64:
4320 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4321 ; CHECK-NEXT: vle64.v v8, (a0)
4322 ; CHECK-NEXT: lui a1, %hi(.LCPI126_0)
4323 ; CHECK-NEXT: fld fa5, %lo(.LCPI126_0)(a1)
4324 ; CHECK-NEXT: vfabs.v v9, v8
4325 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4326 ; CHECK-NEXT: fsrmi a1, 2
4327 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4328 ; CHECK-NEXT: fsrm a1
4329 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4330 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
4331 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4332 ; CHECK-NEXT: vse64.v v8, (a0)
4334 %a = load <2 x double>, ptr %x
4335 %b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)
4336 store <2 x double> %b, ptr %x
4339 declare <2 x double> @llvm.floor.v2f64(<2 x double>)
4341 define void @round_v8f16(ptr %x) {
4342 ; ZVFH-LABEL: round_v8f16:
4344 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4345 ; ZVFH-NEXT: vle16.v v8, (a0)
4346 ; ZVFH-NEXT: lui a1, %hi(.LCPI127_0)
4347 ; ZVFH-NEXT: flh fa5, %lo(.LCPI127_0)(a1)
4348 ; ZVFH-NEXT: vfabs.v v9, v8
4349 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4350 ; ZVFH-NEXT: fsrmi a1, 4
4351 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4352 ; ZVFH-NEXT: fsrm a1
4353 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4354 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4355 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4356 ; ZVFH-NEXT: vse16.v v8, (a0)
4359 ; ZVFHMIN-LABEL: round_v8f16:
4361 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4362 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4363 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
4364 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4365 ; ZVFHMIN-NEXT: vfabs.v v8, v9
4366 ; ZVFHMIN-NEXT: lui a1, 307200
4367 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4368 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4369 ; ZVFHMIN-NEXT: fsrmi a1, 4
4370 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
4371 ; ZVFHMIN-NEXT: fsrm a1
4372 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
4373 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4374 ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
4375 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4376 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
4377 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
4379 %a = load <8 x half>, ptr %x
4380 %b = call <8 x half> @llvm.round.v8f16(<8 x half> %a)
4381 store <8 x half> %b, ptr %x
4384 declare <8 x half> @llvm.round.v8f16(<8 x half>)
4386 define void @round_v6f16(ptr %x) {
4387 ; ZVFH-LABEL: round_v6f16:
4389 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4390 ; ZVFH-NEXT: vle16.v v8, (a0)
4391 ; ZVFH-NEXT: lui a1, %hi(.LCPI128_0)
4392 ; ZVFH-NEXT: flh fa5, %lo(.LCPI128_0)(a1)
4393 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4394 ; ZVFH-NEXT: vfabs.v v9, v8
4395 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4396 ; ZVFH-NEXT: fsrmi a1, 4
4397 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4398 ; ZVFH-NEXT: fsrm a1
4399 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4400 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4401 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4402 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4403 ; ZVFH-NEXT: vse16.v v8, (a0)
4406 ; ZVFHMIN-RV32-LABEL: round_v6f16:
4407 ; ZVFHMIN-RV32: # %bb.0:
4408 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4409 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
4410 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
4411 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4412 ; ZVFHMIN-RV32-NEXT: vfabs.v v8, v9
4413 ; ZVFHMIN-RV32-NEXT: lui a1, 307200
4414 ; ZVFHMIN-RV32-NEXT: fmv.w.x fa5, a1
4415 ; ZVFHMIN-RV32-NEXT: vmflt.vf v0, v8, fa5
4416 ; ZVFHMIN-RV32-NEXT: fsrmi a1, 4
4417 ; ZVFHMIN-RV32-NEXT: vfcvt.x.f.v v8, v9, v0.t
4418 ; ZVFHMIN-RV32-NEXT: fsrm a1
4419 ; ZVFHMIN-RV32-NEXT: vfcvt.f.x.v v8, v8, v0.t
4420 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4421 ; ZVFHMIN-RV32-NEXT: vfsgnj.vv v9, v8, v9, v0.t
4422 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4423 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v8, v9
4424 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4425 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v9, v8, 2
4426 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
4427 ; ZVFHMIN-RV32-NEXT: vse32.v v9, (a1)
4428 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4429 ; ZVFHMIN-RV32-NEXT: vse16.v v8, (a0)
4430 ; ZVFHMIN-RV32-NEXT: ret
4432 ; ZVFHMIN-RV64-LABEL: round_v6f16:
4433 ; ZVFHMIN-RV64: # %bb.0:
4434 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4435 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
4436 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
4437 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4438 ; ZVFHMIN-RV64-NEXT: vfabs.v v8, v9
4439 ; ZVFHMIN-RV64-NEXT: lui a1, 307200
4440 ; ZVFHMIN-RV64-NEXT: fmv.w.x fa5, a1
4441 ; ZVFHMIN-RV64-NEXT: vmflt.vf v0, v8, fa5
4442 ; ZVFHMIN-RV64-NEXT: fsrmi a1, 4
4443 ; ZVFHMIN-RV64-NEXT: vfcvt.x.f.v v8, v9, v0.t
4444 ; ZVFHMIN-RV64-NEXT: fsrm a1
4445 ; ZVFHMIN-RV64-NEXT: vfcvt.f.x.v v8, v8, v0.t
4446 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4447 ; ZVFHMIN-RV64-NEXT: vfsgnj.vv v9, v8, v9, v0.t
4448 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4449 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v8, v9
4450 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4451 ; ZVFHMIN-RV64-NEXT: vse64.v v8, (a0)
4452 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v8, 2
4453 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
4454 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
4455 ; ZVFHMIN-RV64-NEXT: ret
4456 %a = load <6 x half>, ptr %x
4457 %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a)
4458 store <6 x half> %b, ptr %x
4461 declare <6 x half> @llvm.round.v6f16(<6 x half>)
4463 define void @round_v4f32(ptr %x) {
4464 ; ZVFH-LABEL: round_v4f32:
4466 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4467 ; ZVFH-NEXT: vle32.v v8, (a0)
4468 ; ZVFH-NEXT: vfabs.v v9, v8
4469 ; ZVFH-NEXT: lui a1, 307200
4470 ; ZVFH-NEXT: fmv.w.x fa5, a1
4471 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4472 ; ZVFH-NEXT: fsrmi a1, 4
4473 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4474 ; ZVFH-NEXT: fsrm a1
4475 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4476 ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4477 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4478 ; ZVFH-NEXT: vse32.v v8, (a0)
4481 ; ZVFHMIN-LABEL: round_v4f32:
4483 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
4484 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
4485 ; ZVFHMIN-NEXT: vfabs.v v9, v8
4486 ; ZVFHMIN-NEXT: lui a1, 307200
4487 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4488 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5
4489 ; ZVFHMIN-NEXT: fsrmi a1, 4
4490 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t
4491 ; ZVFHMIN-NEXT: fsrm a1
4492 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t
4493 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
4494 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4495 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
4497 %a = load <4 x float>, ptr %x
4498 %b = call <4 x float> @llvm.round.v4f32(<4 x float> %a)
4499 store <4 x float> %b, ptr %x
4502 declare <4 x float> @llvm.round.v4f32(<4 x float>)
4504 define void @round_v2f64(ptr %x) {
4505 ; CHECK-LABEL: round_v2f64:
4507 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4508 ; CHECK-NEXT: vle64.v v8, (a0)
4509 ; CHECK-NEXT: lui a1, %hi(.LCPI130_0)
4510 ; CHECK-NEXT: fld fa5, %lo(.LCPI130_0)(a1)
4511 ; CHECK-NEXT: vfabs.v v9, v8
4512 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4513 ; CHECK-NEXT: fsrmi a1, 4
4514 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4515 ; CHECK-NEXT: fsrm a1
4516 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4517 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
4518 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4519 ; CHECK-NEXT: vse64.v v8, (a0)
4521 %a = load <2 x double>, ptr %x
4522 %b = call <2 x double> @llvm.round.v2f64(<2 x double> %a)
4523 store <2 x double> %b, ptr %x
4526 declare <2 x double> @llvm.round.v2f64(<2 x double>)
4528 define void @rint_v8f16(ptr %x) {
4529 ; ZVFH-LABEL: rint_v8f16:
4531 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4532 ; ZVFH-NEXT: vle16.v v8, (a0)
4533 ; ZVFH-NEXT: lui a1, %hi(.LCPI131_0)
4534 ; ZVFH-NEXT: flh fa5, %lo(.LCPI131_0)(a1)
4535 ; ZVFH-NEXT: vfabs.v v9, v8
4536 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4537 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4538 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4539 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4540 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4541 ; ZVFH-NEXT: vse16.v v8, (a0)
4544 ; ZVFHMIN-LABEL: rint_v8f16:
4546 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4547 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4548 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
4549 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4550 ; ZVFHMIN-NEXT: vfabs.v v8, v9
4551 ; ZVFHMIN-NEXT: lui a1, 307200
4552 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4553 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4554 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
4555 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
4556 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4557 ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
4558 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4559 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
4560 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
4562 %a = load <8 x half>, ptr %x
4563 %b = call <8 x half> @llvm.rint.v8f16(<8 x half> %a)
4564 store <8 x half> %b, ptr %x
4567 declare <8 x half> @llvm.rint.v8f16(<8 x half>)
4569 define void @rint_v4f32(ptr %x) {
4570 ; ZVFH-LABEL: rint_v4f32:
4572 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4573 ; ZVFH-NEXT: vle32.v v8, (a0)
4574 ; ZVFH-NEXT: vfabs.v v9, v8
4575 ; ZVFH-NEXT: lui a1, 307200
4576 ; ZVFH-NEXT: fmv.w.x fa5, a1
4577 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4578 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4579 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4580 ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4581 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4582 ; ZVFH-NEXT: vse32.v v8, (a0)
4585 ; ZVFHMIN-LABEL: rint_v4f32:
4587 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
4588 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
4589 ; ZVFHMIN-NEXT: vfabs.v v9, v8
4590 ; ZVFHMIN-NEXT: lui a1, 307200
4591 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4592 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5
4593 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t
4594 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t
4595 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
4596 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4597 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
4599 %a = load <4 x float>, ptr %x
4600 %b = call <4 x float> @llvm.rint.v4f32(<4 x float> %a)
4601 store <4 x float> %b, ptr %x
4604 declare <4 x float> @llvm.rint.v4f32(<4 x float>)
4606 define void @rint_v2f64(ptr %x) {
4607 ; CHECK-LABEL: rint_v2f64:
4609 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4610 ; CHECK-NEXT: vle64.v v8, (a0)
4611 ; CHECK-NEXT: lui a1, %hi(.LCPI133_0)
4612 ; CHECK-NEXT: fld fa5, %lo(.LCPI133_0)(a1)
4613 ; CHECK-NEXT: vfabs.v v9, v8
4614 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4615 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4616 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4617 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
4618 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4619 ; CHECK-NEXT: vse64.v v8, (a0)
4621 %a = load <2 x double>, ptr %x
4622 %b = call <2 x double> @llvm.rint.v2f64(<2 x double> %a)
4623 store <2 x double> %b, ptr %x
4626 declare <2 x double> @llvm.rint.v2f64(<2 x double>)
4628 define void @nearbyint_v8f16(ptr %x) {
4629 ; ZVFH-LABEL: nearbyint_v8f16:
4631 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4632 ; ZVFH-NEXT: vle16.v v8, (a0)
4633 ; ZVFH-NEXT: lui a1, %hi(.LCPI134_0)
4634 ; ZVFH-NEXT: flh fa5, %lo(.LCPI134_0)(a1)
4635 ; ZVFH-NEXT: vfabs.v v9, v8
4636 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4637 ; ZVFH-NEXT: frflags a1
4638 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4639 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4640 ; ZVFH-NEXT: fsflags a1
4641 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4642 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4643 ; ZVFH-NEXT: vse16.v v8, (a0)
4646 ; ZVFHMIN-LABEL: nearbyint_v8f16:
4648 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4649 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4650 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
4651 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4652 ; ZVFHMIN-NEXT: vfabs.v v8, v9
4653 ; ZVFHMIN-NEXT: lui a1, 307200
4654 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4655 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4656 ; ZVFHMIN-NEXT: frflags a1
4657 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t
4658 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
4659 ; ZVFHMIN-NEXT: fsflags a1
4660 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4661 ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t
4662 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4663 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
4664 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
4666 %a = load <8 x half>, ptr %x
4667 %b = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a)
4668 store <8 x half> %b, ptr %x
4671 declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>)
4673 define void @nearbyint_v4f32(ptr %x) {
4674 ; ZVFH-LABEL: nearbyint_v4f32:
4676 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4677 ; ZVFH-NEXT: vle32.v v8, (a0)
4678 ; ZVFH-NEXT: vfabs.v v9, v8
4679 ; ZVFH-NEXT: lui a1, 307200
4680 ; ZVFH-NEXT: fmv.w.x fa5, a1
4681 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4682 ; ZVFH-NEXT: frflags a1
4683 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4684 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4685 ; ZVFH-NEXT: fsflags a1
4686 ; ZVFH-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4687 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4688 ; ZVFH-NEXT: vse32.v v8, (a0)
4691 ; ZVFHMIN-LABEL: nearbyint_v4f32:
4693 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
4694 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
4695 ; ZVFHMIN-NEXT: vfabs.v v9, v8
4696 ; ZVFHMIN-NEXT: lui a1, 307200
4697 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4698 ; ZVFHMIN-NEXT: vmflt.vf v0, v9, fa5
4699 ; ZVFHMIN-NEXT: frflags a1
4700 ; ZVFHMIN-NEXT: vfcvt.x.f.v v9, v8, v0.t
4701 ; ZVFHMIN-NEXT: vfcvt.f.x.v v9, v9, v0.t
4702 ; ZVFHMIN-NEXT: fsflags a1
4703 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
4704 ; ZVFHMIN-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4705 ; ZVFHMIN-NEXT: vse32.v v8, (a0)
4707 %a = load <4 x float>, ptr %x
4708 %b = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a)
4709 store <4 x float> %b, ptr %x
4712 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
4714 define void @nearbyint_v2f64(ptr %x) {
4715 ; CHECK-LABEL: nearbyint_v2f64:
4717 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4718 ; CHECK-NEXT: vle64.v v8, (a0)
4719 ; CHECK-NEXT: lui a1, %hi(.LCPI136_0)
4720 ; CHECK-NEXT: fld fa5, %lo(.LCPI136_0)(a1)
4721 ; CHECK-NEXT: vfabs.v v9, v8
4722 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4723 ; CHECK-NEXT: frflags a1
4724 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4725 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4726 ; CHECK-NEXT: fsflags a1
4727 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
4728 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4729 ; CHECK-NEXT: vse64.v v8, (a0)
4731 %a = load <2 x double>, ptr %x
4732 %b = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
4733 store <2 x double> %b, ptr %x
4736 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
4738 define void @fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
4739 ; ZVFH-LABEL: fmuladd_v8f16:
4741 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4742 ; ZVFH-NEXT: vle16.v v8, (a0)
4743 ; ZVFH-NEXT: vle16.v v9, (a1)
4744 ; ZVFH-NEXT: vle16.v v10, (a2)
4745 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
4746 ; ZVFH-NEXT: vse16.v v10, (a0)
4749 ; ZVFHMIN-LABEL: fmuladd_v8f16:
4751 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4752 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
4753 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
4754 ; ZVFHMIN-NEXT: vle16.v v10, (a2)
4755 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
4756 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
4757 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4758 ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v11
4759 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4760 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
4761 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
4762 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
4763 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4764 ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v9
4765 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4766 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
4767 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
4769 %a = load <8 x half>, ptr %x
4770 %b = load <8 x half>, ptr %y
4771 %c = load <8 x half>, ptr %z
4772 %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
4773 store <8 x half> %d, ptr %x
4776 declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>)
4778 define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
4779 ; ZVFH-LABEL: fmuladd_v6f16:
4781 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4782 ; ZVFH-NEXT: vle16.v v8, (a0)
4783 ; ZVFH-NEXT: vle16.v v9, (a1)
4784 ; ZVFH-NEXT: vle16.v v10, (a2)
4785 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4786 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
4787 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4788 ; ZVFH-NEXT: vse16.v v10, (a0)
4791 ; ZVFHMIN-RV32-LABEL: fmuladd_v6f16:
4792 ; ZVFHMIN-RV32: # %bb.0:
4793 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4794 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
4795 ; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
4796 ; ZVFHMIN-RV32-NEXT: vle16.v v10, (a2)
4797 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v8
4798 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
4799 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4800 ; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v8, v11
4801 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4802 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
4803 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
4804 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
4805 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4806 ; ZVFHMIN-RV32-NEXT: vfadd.vv v8, v8, v9
4807 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4808 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
4809 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4810 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
4811 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
4812 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
4813 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4814 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
4815 ; ZVFHMIN-RV32-NEXT: ret
4817 ; ZVFHMIN-RV64-LABEL: fmuladd_v6f16:
4818 ; ZVFHMIN-RV64: # %bb.0:
4819 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4820 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
4821 ; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
4822 ; ZVFHMIN-RV64-NEXT: vle16.v v10, (a2)
4823 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v8
4824 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
4825 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4826 ; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v8, v11
4827 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4828 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
4829 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
4830 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
4831 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4832 ; ZVFHMIN-RV64-NEXT: vfadd.vv v8, v8, v9
4833 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4834 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
4835 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4836 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
4837 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
4838 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
4839 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
4840 ; ZVFHMIN-RV64-NEXT: ret
4841 %a = load <6 x half>, ptr %x
4842 %b = load <6 x half>, ptr %y
4843 %c = load <6 x half>, ptr %z
4844 %d = call <6 x half> @llvm.fmuladd.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %c)
4845 store <6 x half> %d, ptr %x
4848 declare <6 x half> @llvm.fmuladd.v6f16(<6 x half>, <6 x half>, <6 x half>)
4850 define void @fmuladd_v4f32(ptr %x, ptr %y, ptr %z) {
4851 ; ZVFH-LABEL: fmuladd_v4f32:
4853 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4854 ; ZVFH-NEXT: vle32.v v8, (a0)
4855 ; ZVFH-NEXT: vle32.v v9, (a1)
4856 ; ZVFH-NEXT: vle32.v v10, (a2)
4857 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
4858 ; ZVFH-NEXT: vse32.v v10, (a0)
4861 ; ZVFHMIN-LABEL: fmuladd_v4f32:
4863 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
4864 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
4865 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
4866 ; ZVFHMIN-NEXT: vle32.v v10, (a2)
4867 ; ZVFHMIN-NEXT: vfmacc.vv v10, v8, v9
4868 ; ZVFHMIN-NEXT: vse32.v v10, (a0)
4870 %a = load <4 x float>, ptr %x
4871 %b = load <4 x float>, ptr %y
4872 %c = load <4 x float>, ptr %z
4873 %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
4874 store <4 x float> %d, ptr %x
4877 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
4879 define void @fmuladd_v2f64(ptr %x, ptr %y, ptr %z) {
4880 ; CHECK-LABEL: fmuladd_v2f64:
4882 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4883 ; CHECK-NEXT: vle64.v v8, (a0)
4884 ; CHECK-NEXT: vle64.v v9, (a1)
4885 ; CHECK-NEXT: vle64.v v10, (a2)
4886 ; CHECK-NEXT: vfmacc.vv v10, v8, v9
4887 ; CHECK-NEXT: vse64.v v10, (a0)
4889 %a = load <2 x double>, ptr %x
4890 %b = load <2 x double>, ptr %y
4891 %c = load <2 x double>, ptr %z
4892 %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
4893 store <2 x double> %d, ptr %x
4896 declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
4898 define void @fmsub_fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
4899 ; ZVFH-LABEL: fmsub_fmuladd_v8f16:
4901 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4902 ; ZVFH-NEXT: vle16.v v8, (a0)
4903 ; ZVFH-NEXT: vle16.v v9, (a1)
4904 ; ZVFH-NEXT: vle16.v v10, (a2)
4905 ; ZVFH-NEXT: vfmsac.vv v10, v8, v9
4906 ; ZVFH-NEXT: vse16.v v10, (a0)
4909 ; ZVFHMIN-LABEL: fmsub_fmuladd_v8f16:
4911 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4912 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
4913 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
4914 ; ZVFHMIN-NEXT: vle16.v v10, (a2)
4915 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
4916 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
4917 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4918 ; ZVFHMIN-NEXT: vfmul.vv v8, v8, v11
4919 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4920 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
4921 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
4922 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
4923 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4924 ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v9
4925 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4926 ; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
4927 ; ZVFHMIN-NEXT: vse16.v v9, (a0)
4929 %a = load <8 x half>, ptr %x
4930 %b = load <8 x half>, ptr %y
4931 %c = load <8 x half>, ptr %z
4932 %neg = fneg <8 x half> %c
4933 %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg)
4934 store <8 x half> %d, ptr %x
4938 define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
4939 ; ZVFH-LABEL: fmsub_fmuladd_v6f16:
4941 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4942 ; ZVFH-NEXT: vle16.v v8, (a0)
4943 ; ZVFH-NEXT: vle16.v v9, (a1)
4944 ; ZVFH-NEXT: vle16.v v10, (a2)
4945 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4946 ; ZVFH-NEXT: vfmsac.vv v10, v8, v9
4947 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4948 ; ZVFH-NEXT: vse16.v v10, (a0)
4951 ; ZVFHMIN-RV32-LABEL: fmsub_fmuladd_v6f16:
4952 ; ZVFHMIN-RV32: # %bb.0:
4953 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4954 ; ZVFHMIN-RV32-NEXT: vle16.v v8, (a1)
4955 ; ZVFHMIN-RV32-NEXT: vle16.v v9, (a0)
4956 ; ZVFHMIN-RV32-NEXT: vle16.v v10, (a2)
4957 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v11, v8
4958 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
4959 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4960 ; ZVFHMIN-RV32-NEXT: vfmul.vv v8, v8, v11
4961 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4962 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
4963 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v8, v9
4964 ; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v10
4965 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4966 ; ZVFHMIN-RV32-NEXT: vfsub.vv v8, v8, v9
4967 ; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4968 ; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
4969 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4970 ; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
4971 ; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
4972 ; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
4973 ; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
4974 ; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
4975 ; ZVFHMIN-RV32-NEXT: ret
4977 ; ZVFHMIN-RV64-LABEL: fmsub_fmuladd_v6f16:
4978 ; ZVFHMIN-RV64: # %bb.0:
4979 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
4980 ; ZVFHMIN-RV64-NEXT: vle16.v v8, (a1)
4981 ; ZVFHMIN-RV64-NEXT: vle16.v v9, (a0)
4982 ; ZVFHMIN-RV64-NEXT: vle16.v v10, (a2)
4983 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v11, v8
4984 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
4985 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4986 ; ZVFHMIN-RV64-NEXT: vfmul.vv v8, v8, v11
4987 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4988 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
4989 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v8, v9
4990 ; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v10
4991 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
4992 ; ZVFHMIN-RV64-NEXT: vfsub.vv v8, v8, v9
4993 ; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
4994 ; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
4995 ; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
4996 ; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
4997 ; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
4998 ; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
4999 ; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
5000 ; ZVFHMIN-RV64-NEXT: ret
5001 %a = load <6 x half>, ptr %x
5002 %b = load <6 x half>, ptr %y
5003 %c = load <6 x half>, ptr %z
5004 %neg = fneg <6 x half> %c
5005 %d = call <6 x half> @llvm.fmuladd.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %neg)
5006 store <6 x half> %d, ptr %x
5010 define void @fnmsub_fmuladd_v4f32(ptr %x, ptr %y, ptr %z) {
5011 ; ZVFH-LABEL: fnmsub_fmuladd_v4f32:
5013 ; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5014 ; ZVFH-NEXT: vle32.v v8, (a0)
5015 ; ZVFH-NEXT: vle32.v v9, (a1)
5016 ; ZVFH-NEXT: vle32.v v10, (a2)
5017 ; ZVFH-NEXT: vfnmsac.vv v10, v8, v9
5018 ; ZVFH-NEXT: vse32.v v10, (a0)
5021 ; ZVFHMIN-LABEL: fnmsub_fmuladd_v4f32:
5023 ; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
5024 ; ZVFHMIN-NEXT: vle32.v v8, (a0)
5025 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
5026 ; ZVFHMIN-NEXT: vle32.v v10, (a2)
5027 ; ZVFHMIN-NEXT: vfnmsac.vv v10, v8, v9
5028 ; ZVFHMIN-NEXT: vse32.v v10, (a0)
5030 %a = load <4 x float>, ptr %x
5031 %b = load <4 x float>, ptr %y
5032 %c = load <4 x float>, ptr %z
5033 %neg = fneg <4 x float> %a
5034 %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c)
5035 store <4 x float> %d, ptr %x
5039 define void @fnmadd_fmuladd_v2f64(ptr %x, ptr %y, ptr %z) {
5040 ; CHECK-LABEL: fnmadd_fmuladd_v2f64:
5042 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5043 ; CHECK-NEXT: vle64.v v8, (a0)
5044 ; CHECK-NEXT: vle64.v v9, (a1)
5045 ; CHECK-NEXT: vle64.v v10, (a2)
5046 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9
5047 ; CHECK-NEXT: vse64.v v10, (a0)
5049 %a = load <2 x double>, ptr %x
5050 %b = load <2 x double>, ptr %y
5051 %c = load <2 x double>, ptr %z
5052 %neg = fneg <2 x double> %b
5053 %neg2 = fneg <2 x double> %c
5054 %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2)
5055 store <2 x double> %d, ptr %x