1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
3 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4 ; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
5 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8 define void @fadd_v8bf16(ptr %x, ptr %y) {
9 ; CHECK-LABEL: fadd_v8bf16:
11 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
12 ; CHECK-NEXT: vle16.v v8, (a1)
13 ; CHECK-NEXT: vle16.v v9, (a0)
14 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
15 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
16 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
17 ; CHECK-NEXT: vfadd.vv v8, v12, v10
18 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
19 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
20 ; CHECK-NEXT: vse16.v v10, (a0)
22 %a = load <8 x bfloat>, ptr %x
23 %b = load <8 x bfloat>, ptr %y
24 %c = fadd <8 x bfloat> %a, %b
25 store <8 x bfloat> %c, ptr %x
29 define void @fadd_v6bf16(ptr %x, ptr %y) {
30 ; CHECK-LABEL: fadd_v6bf16:
32 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
33 ; CHECK-NEXT: vle16.v v8, (a1)
34 ; CHECK-NEXT: vle16.v v9, (a0)
35 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
36 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
37 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
38 ; CHECK-NEXT: vfadd.vv v8, v12, v10
39 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
40 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
41 ; CHECK-NEXT: vse16.v v10, (a0)
43 %a = load <6 x bfloat>, ptr %x
44 %b = load <6 x bfloat>, ptr %y
45 %c = fadd <6 x bfloat> %a, %b
46 store <6 x bfloat> %c, ptr %x
50 define void @fadd_v8f16(ptr %x, ptr %y) {
51 ; ZVFH-LABEL: fadd_v8f16:
53 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
54 ; ZVFH-NEXT: vle16.v v8, (a0)
55 ; ZVFH-NEXT: vle16.v v9, (a1)
56 ; ZVFH-NEXT: vfadd.vv v8, v8, v9
57 ; ZVFH-NEXT: vse16.v v8, (a0)
60 ; ZVFHMIN-LABEL: fadd_v8f16:
62 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
63 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
64 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
65 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
66 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
67 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
68 ; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10
69 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
70 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
71 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
73 %a = load <8 x half>, ptr %x
74 %b = load <8 x half>, ptr %y
75 %c = fadd <8 x half> %a, %b
76 store <8 x half> %c, ptr %x
80 define void @fadd_v6f16(ptr %x, ptr %y) {
81 ; ZVFH-LABEL: fadd_v6f16:
83 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
84 ; ZVFH-NEXT: vle16.v v8, (a0)
85 ; ZVFH-NEXT: vle16.v v9, (a1)
86 ; ZVFH-NEXT: vfadd.vv v8, v8, v9
87 ; ZVFH-NEXT: vse16.v v8, (a0)
90 ; ZVFHMIN-LABEL: fadd_v6f16:
92 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
93 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
94 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
95 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
96 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
97 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
98 ; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10
99 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
100 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
101 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
103 %a = load <6 x half>, ptr %x
104 %b = load <6 x half>, ptr %y
105 %c = fadd <6 x half> %a, %b
106 store <6 x half> %c, ptr %x
110 define void @fadd_v4f32(ptr %x, ptr %y) {
111 ; CHECK-LABEL: fadd_v4f32:
113 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
114 ; CHECK-NEXT: vle32.v v8, (a0)
115 ; CHECK-NEXT: vle32.v v9, (a1)
116 ; CHECK-NEXT: vfadd.vv v8, v8, v9
117 ; CHECK-NEXT: vse32.v v8, (a0)
119 %a = load <4 x float>, ptr %x
120 %b = load <4 x float>, ptr %y
121 %c = fadd <4 x float> %a, %b
122 store <4 x float> %c, ptr %x
126 define void @fadd_v2f64(ptr %x, ptr %y) {
127 ; CHECK-LABEL: fadd_v2f64:
129 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
130 ; CHECK-NEXT: vle64.v v8, (a0)
131 ; CHECK-NEXT: vle64.v v9, (a1)
132 ; CHECK-NEXT: vfadd.vv v8, v8, v9
133 ; CHECK-NEXT: vse64.v v8, (a0)
135 %a = load <2 x double>, ptr %x
136 %b = load <2 x double>, ptr %y
137 %c = fadd <2 x double> %a, %b
138 store <2 x double> %c, ptr %x
142 define void @fsub_v8bf16(ptr %x, ptr %y) {
143 ; CHECK-LABEL: fsub_v8bf16:
145 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
146 ; CHECK-NEXT: vle16.v v8, (a1)
147 ; CHECK-NEXT: vle16.v v9, (a0)
148 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
149 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
150 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
151 ; CHECK-NEXT: vfsub.vv v8, v12, v10
152 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
153 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
154 ; CHECK-NEXT: vse16.v v10, (a0)
156 %a = load <8 x bfloat>, ptr %x
157 %b = load <8 x bfloat>, ptr %y
158 %c = fsub <8 x bfloat> %a, %b
159 store <8 x bfloat> %c, ptr %x
163 define void @fsub_v6bf16(ptr %x, ptr %y) {
164 ; CHECK-LABEL: fsub_v6bf16:
166 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
167 ; CHECK-NEXT: vle16.v v8, (a1)
168 ; CHECK-NEXT: vle16.v v9, (a0)
169 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
170 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
171 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
172 ; CHECK-NEXT: vfsub.vv v8, v12, v10
173 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
174 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
175 ; CHECK-NEXT: vse16.v v10, (a0)
177 %a = load <6 x bfloat>, ptr %x
178 %b = load <6 x bfloat>, ptr %y
179 %c = fsub <6 x bfloat> %a, %b
180 store <6 x bfloat> %c, ptr %x
184 define void @fsub_v8f16(ptr %x, ptr %y) {
185 ; ZVFH-LABEL: fsub_v8f16:
187 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
188 ; ZVFH-NEXT: vle16.v v8, (a0)
189 ; ZVFH-NEXT: vle16.v v9, (a1)
190 ; ZVFH-NEXT: vfsub.vv v8, v8, v9
191 ; ZVFH-NEXT: vse16.v v8, (a0)
194 ; ZVFHMIN-LABEL: fsub_v8f16:
196 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
197 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
198 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
199 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
200 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
201 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
202 ; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10
203 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
204 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
205 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
207 %a = load <8 x half>, ptr %x
208 %b = load <8 x half>, ptr %y
209 %c = fsub <8 x half> %a, %b
210 store <8 x half> %c, ptr %x
214 define void @fsub_v6f16(ptr %x, ptr %y) {
215 ; ZVFH-LABEL: fsub_v6f16:
217 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
218 ; ZVFH-NEXT: vle16.v v8, (a0)
219 ; ZVFH-NEXT: vle16.v v9, (a1)
220 ; ZVFH-NEXT: vfsub.vv v8, v8, v9
221 ; ZVFH-NEXT: vse16.v v8, (a0)
224 ; ZVFHMIN-LABEL: fsub_v6f16:
226 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
227 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
228 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
229 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
230 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
231 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
232 ; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10
233 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
234 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
235 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
237 %a = load <6 x half>, ptr %x
238 %b = load <6 x half>, ptr %y
239 %c = fsub <6 x half> %a, %b
240 store <6 x half> %c, ptr %x
244 define void @fsub_v4f32(ptr %x, ptr %y) {
245 ; CHECK-LABEL: fsub_v4f32:
247 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
248 ; CHECK-NEXT: vle32.v v8, (a0)
249 ; CHECK-NEXT: vle32.v v9, (a1)
250 ; CHECK-NEXT: vfsub.vv v8, v8, v9
251 ; CHECK-NEXT: vse32.v v8, (a0)
253 %a = load <4 x float>, ptr %x
254 %b = load <4 x float>, ptr %y
255 %c = fsub <4 x float> %a, %b
256 store <4 x float> %c, ptr %x
260 define void @fsub_v2f64(ptr %x, ptr %y) {
261 ; CHECK-LABEL: fsub_v2f64:
263 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
264 ; CHECK-NEXT: vle64.v v8, (a0)
265 ; CHECK-NEXT: vle64.v v9, (a1)
266 ; CHECK-NEXT: vfsub.vv v8, v8, v9
267 ; CHECK-NEXT: vse64.v v8, (a0)
269 %a = load <2 x double>, ptr %x
270 %b = load <2 x double>, ptr %y
271 %c = fsub <2 x double> %a, %b
272 store <2 x double> %c, ptr %x
276 define void @fmul_v8bf16(ptr %x, ptr %y) {
277 ; CHECK-LABEL: fmul_v8bf16:
279 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
280 ; CHECK-NEXT: vle16.v v8, (a1)
281 ; CHECK-NEXT: vle16.v v9, (a0)
282 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
283 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
284 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
285 ; CHECK-NEXT: vfmul.vv v8, v12, v10
286 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
287 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
288 ; CHECK-NEXT: vse16.v v10, (a0)
290 %a = load <8 x bfloat>, ptr %x
291 %b = load <8 x bfloat>, ptr %y
292 %c = fmul <8 x bfloat> %a, %b
293 store <8 x bfloat> %c, ptr %x
297 define void @fmul_v6bf16(ptr %x, ptr %y) {
298 ; CHECK-LABEL: fmul_v6bf16:
300 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
301 ; CHECK-NEXT: vle16.v v8, (a1)
302 ; CHECK-NEXT: vle16.v v9, (a0)
303 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
304 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
305 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
306 ; CHECK-NEXT: vfmul.vv v8, v12, v10
307 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
308 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
309 ; CHECK-NEXT: vse16.v v10, (a0)
311 %a = load <6 x bfloat>, ptr %x
312 %b = load <6 x bfloat>, ptr %y
313 %c = fmul <6 x bfloat> %a, %b
314 store <6 x bfloat> %c, ptr %x
318 define void @fmul_v8f16(ptr %x, ptr %y) {
319 ; ZVFH-LABEL: fmul_v8f16:
321 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
322 ; ZVFH-NEXT: vle16.v v8, (a0)
323 ; ZVFH-NEXT: vle16.v v9, (a1)
324 ; ZVFH-NEXT: vfmul.vv v8, v8, v9
325 ; ZVFH-NEXT: vse16.v v8, (a0)
328 ; ZVFHMIN-LABEL: fmul_v8f16:
330 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
331 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
332 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
333 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
334 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
335 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
336 ; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10
337 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
338 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
339 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
341 %a = load <8 x half>, ptr %x
342 %b = load <8 x half>, ptr %y
343 %c = fmul <8 x half> %a, %b
344 store <8 x half> %c, ptr %x
348 define void @fmul_v6f16(ptr %x, ptr %y) {
349 ; ZVFH-LABEL: fmul_v6f16:
351 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
352 ; ZVFH-NEXT: vle16.v v8, (a0)
353 ; ZVFH-NEXT: vle16.v v9, (a1)
354 ; ZVFH-NEXT: vfmul.vv v8, v8, v9
355 ; ZVFH-NEXT: vse16.v v8, (a0)
358 ; ZVFHMIN-LABEL: fmul_v6f16:
360 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
361 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
362 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
363 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
364 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
365 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
366 ; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10
367 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
368 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
369 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
371 %a = load <6 x half>, ptr %x
372 %b = load <6 x half>, ptr %y
373 %c = fmul <6 x half> %a, %b
374 store <6 x half> %c, ptr %x
378 define void @fmul_v4f32(ptr %x, ptr %y) {
379 ; CHECK-LABEL: fmul_v4f32:
381 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
382 ; CHECK-NEXT: vle32.v v8, (a0)
383 ; CHECK-NEXT: vle32.v v9, (a1)
384 ; CHECK-NEXT: vfmul.vv v8, v8, v9
385 ; CHECK-NEXT: vse32.v v8, (a0)
387 %a = load <4 x float>, ptr %x
388 %b = load <4 x float>, ptr %y
389 %c = fmul <4 x float> %a, %b
390 store <4 x float> %c, ptr %x
394 define void @fmul_v2f64(ptr %x, ptr %y) {
395 ; CHECK-LABEL: fmul_v2f64:
397 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
398 ; CHECK-NEXT: vle64.v v8, (a0)
399 ; CHECK-NEXT: vle64.v v9, (a1)
400 ; CHECK-NEXT: vfmul.vv v8, v8, v9
401 ; CHECK-NEXT: vse64.v v8, (a0)
403 %a = load <2 x double>, ptr %x
404 %b = load <2 x double>, ptr %y
405 %c = fmul <2 x double> %a, %b
406 store <2 x double> %c, ptr %x
410 define void @fdiv_v8bf16(ptr %x, ptr %y) {
411 ; CHECK-LABEL: fdiv_v8bf16:
413 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
414 ; CHECK-NEXT: vle16.v v8, (a1)
415 ; CHECK-NEXT: vle16.v v9, (a0)
416 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
417 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
418 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
419 ; CHECK-NEXT: vfdiv.vv v8, v12, v10
420 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
421 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
422 ; CHECK-NEXT: vse16.v v10, (a0)
424 %a = load <8 x bfloat>, ptr %x
425 %b = load <8 x bfloat>, ptr %y
426 %c = fdiv <8 x bfloat> %a, %b
427 store <8 x bfloat> %c, ptr %x
431 define void @fdiv_v6bf16(ptr %x, ptr %y) {
432 ; CHECK-LABEL: fdiv_v6bf16:
434 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
435 ; CHECK-NEXT: vle16.v v8, (a1)
436 ; CHECK-NEXT: vle16.v v9, (a0)
437 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
438 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
439 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
440 ; CHECK-NEXT: vfdiv.vv v8, v12, v10
441 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
442 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
443 ; CHECK-NEXT: vse16.v v10, (a0)
445 %a = load <6 x bfloat>, ptr %x
446 %b = load <6 x bfloat>, ptr %y
447 %c = fdiv <6 x bfloat> %a, %b
448 store <6 x bfloat> %c, ptr %x
452 define void @fdiv_v8f16(ptr %x, ptr %y) {
453 ; ZVFH-LABEL: fdiv_v8f16:
455 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
456 ; ZVFH-NEXT: vle16.v v8, (a0)
457 ; ZVFH-NEXT: vle16.v v9, (a1)
458 ; ZVFH-NEXT: vfdiv.vv v8, v8, v9
459 ; ZVFH-NEXT: vse16.v v8, (a0)
462 ; ZVFHMIN-LABEL: fdiv_v8f16:
464 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
465 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
466 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
467 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
468 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
469 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
470 ; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10
471 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
472 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
473 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
475 %a = load <8 x half>, ptr %x
476 %b = load <8 x half>, ptr %y
477 %c = fdiv <8 x half> %a, %b
478 store <8 x half> %c, ptr %x
482 define void @fdiv_v6f16(ptr %x, ptr %y) {
483 ; ZVFH-LABEL: fdiv_v6f16:
485 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
486 ; ZVFH-NEXT: vle16.v v8, (a0)
487 ; ZVFH-NEXT: vle16.v v9, (a1)
488 ; ZVFH-NEXT: vfdiv.vv v8, v8, v9
489 ; ZVFH-NEXT: vse16.v v8, (a0)
492 ; ZVFHMIN-LABEL: fdiv_v6f16:
494 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
495 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
496 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
497 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
498 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
499 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
500 ; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10
501 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
502 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
503 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
505 %a = load <6 x half>, ptr %x
506 %b = load <6 x half>, ptr %y
507 %c = fdiv <6 x half> %a, %b
508 store <6 x half> %c, ptr %x
512 define void @fdiv_v4f32(ptr %x, ptr %y) {
513 ; CHECK-LABEL: fdiv_v4f32:
515 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
516 ; CHECK-NEXT: vle32.v v8, (a0)
517 ; CHECK-NEXT: vle32.v v9, (a1)
518 ; CHECK-NEXT: vfdiv.vv v8, v8, v9
519 ; CHECK-NEXT: vse32.v v8, (a0)
521 %a = load <4 x float>, ptr %x
522 %b = load <4 x float>, ptr %y
523 %c = fdiv <4 x float> %a, %b
524 store <4 x float> %c, ptr %x
528 define void @fdiv_v2f64(ptr %x, ptr %y) {
529 ; CHECK-LABEL: fdiv_v2f64:
531 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
532 ; CHECK-NEXT: vle64.v v8, (a0)
533 ; CHECK-NEXT: vle64.v v9, (a1)
534 ; CHECK-NEXT: vfdiv.vv v8, v8, v9
535 ; CHECK-NEXT: vse64.v v8, (a0)
537 %a = load <2 x double>, ptr %x
538 %b = load <2 x double>, ptr %y
539 %c = fdiv <2 x double> %a, %b
540 store <2 x double> %c, ptr %x
544 define void @fneg_v8bf16(ptr %x) {
545 ; CHECK-LABEL: fneg_v8bf16:
547 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
548 ; CHECK-NEXT: vle16.v v8, (a0)
549 ; CHECK-NEXT: lui a1, 8
550 ; CHECK-NEXT: vxor.vx v8, v8, a1
551 ; CHECK-NEXT: vse16.v v8, (a0)
553 %a = load <8 x bfloat>, ptr %x
554 %b = fneg <8 x bfloat> %a
555 store <8 x bfloat> %b, ptr %x
559 define void @fneg_v6bf16(ptr %x) {
560 ; CHECK-LABEL: fneg_v6bf16:
562 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
563 ; CHECK-NEXT: vle16.v v8, (a0)
564 ; CHECK-NEXT: lui a1, 8
565 ; CHECK-NEXT: vxor.vx v8, v8, a1
566 ; CHECK-NEXT: vse16.v v8, (a0)
568 %a = load <6 x bfloat>, ptr %x
569 %b = fneg <6 x bfloat> %a
570 store <6 x bfloat> %b, ptr %x
574 define void @fneg_v8f16(ptr %x) {
575 ; ZVFH-LABEL: fneg_v8f16:
577 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
578 ; ZVFH-NEXT: vle16.v v8, (a0)
579 ; ZVFH-NEXT: vfneg.v v8, v8
580 ; ZVFH-NEXT: vse16.v v8, (a0)
583 ; ZVFHMIN-LABEL: fneg_v8f16:
585 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
586 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
587 ; ZVFHMIN-NEXT: lui a1, 8
588 ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
589 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
591 %a = load <8 x half>, ptr %x
592 %b = fneg <8 x half> %a
593 store <8 x half> %b, ptr %x
597 define void @fneg_v6f16(ptr %x) {
598 ; ZVFH-LABEL: fneg_v6f16:
600 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
601 ; ZVFH-NEXT: vle16.v v8, (a0)
602 ; ZVFH-NEXT: vfneg.v v8, v8
603 ; ZVFH-NEXT: vse16.v v8, (a0)
606 ; ZVFHMIN-LABEL: fneg_v6f16:
608 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
609 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
610 ; ZVFHMIN-NEXT: lui a1, 8
611 ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
612 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
614 %a = load <6 x half>, ptr %x
615 %b = fneg <6 x half> %a
616 store <6 x half> %b, ptr %x
620 define void @fneg_v4f32(ptr %x) {
621 ; CHECK-LABEL: fneg_v4f32:
623 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
624 ; CHECK-NEXT: vle32.v v8, (a0)
625 ; CHECK-NEXT: vfneg.v v8, v8
626 ; CHECK-NEXT: vse32.v v8, (a0)
628 %a = load <4 x float>, ptr %x
629 %b = fneg <4 x float> %a
630 store <4 x float> %b, ptr %x
634 define void @fneg_v2f64(ptr %x) {
635 ; CHECK-LABEL: fneg_v2f64:
637 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
638 ; CHECK-NEXT: vle64.v v8, (a0)
639 ; CHECK-NEXT: vfneg.v v8, v8
640 ; CHECK-NEXT: vse64.v v8, (a0)
642 %a = load <2 x double>, ptr %x
643 %b = fneg <2 x double> %a
644 store <2 x double> %b, ptr %x
648 define void @fabs_v8bf16(ptr %x) {
649 ; CHECK-LABEL: fabs_v8bf16:
651 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
652 ; CHECK-NEXT: vle16.v v8, (a0)
653 ; CHECK-NEXT: lui a1, 8
654 ; CHECK-NEXT: addi a1, a1, -1
655 ; CHECK-NEXT: vand.vx v8, v8, a1
656 ; CHECK-NEXT: vse16.v v8, (a0)
658 %a = load <8 x bfloat>, ptr %x
659 %b = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> %a)
660 store <8 x bfloat> %b, ptr %x
664 define void @fabs_v6bf16(ptr %x) {
665 ; CHECK-LABEL: fabs_v6bf16:
667 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
668 ; CHECK-NEXT: vle16.v v8, (a0)
669 ; CHECK-NEXT: lui a1, 8
670 ; CHECK-NEXT: addi a1, a1, -1
671 ; CHECK-NEXT: vand.vx v8, v8, a1
672 ; CHECK-NEXT: vse16.v v8, (a0)
674 %a = load <6 x bfloat>, ptr %x
675 %b = call <6 x bfloat> @llvm.fabs.v6bf16(<6 x bfloat> %a)
676 store <6 x bfloat> %b, ptr %x
680 define void @fabs_v8f16(ptr %x) {
681 ; ZVFH-LABEL: fabs_v8f16:
683 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
684 ; ZVFH-NEXT: vle16.v v8, (a0)
685 ; ZVFH-NEXT: vfabs.v v8, v8
686 ; ZVFH-NEXT: vse16.v v8, (a0)
689 ; ZVFHMIN-LABEL: fabs_v8f16:
691 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
692 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
693 ; ZVFHMIN-NEXT: lui a1, 8
694 ; ZVFHMIN-NEXT: addi a1, a1, -1
695 ; ZVFHMIN-NEXT: vand.vx v8, v8, a1
696 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
698 %a = load <8 x half>, ptr %x
699 %b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
700 store <8 x half> %b, ptr %x
704 define void @fabs_v6f16(ptr %x) {
705 ; ZVFH-LABEL: fabs_v6f16:
707 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
708 ; ZVFH-NEXT: vle16.v v8, (a0)
709 ; ZVFH-NEXT: vfabs.v v8, v8
710 ; ZVFH-NEXT: vse16.v v8, (a0)
713 ; ZVFHMIN-LABEL: fabs_v6f16:
715 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
716 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
717 ; ZVFHMIN-NEXT: lui a1, 8
718 ; ZVFHMIN-NEXT: addi a1, a1, -1
719 ; ZVFHMIN-NEXT: vand.vx v8, v8, a1
720 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
722 %a = load <6 x half>, ptr %x
723 %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
724 store <6 x half> %b, ptr %x
728 define void @fabs_v4f32(ptr %x) {
729 ; CHECK-LABEL: fabs_v4f32:
731 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
732 ; CHECK-NEXT: vle32.v v8, (a0)
733 ; CHECK-NEXT: vfabs.v v8, v8
734 ; CHECK-NEXT: vse32.v v8, (a0)
736 %a = load <4 x float>, ptr %x
737 %b = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
738 store <4 x float> %b, ptr %x
742 define void @fabs_v2f64(ptr %x) {
743 ; CHECK-LABEL: fabs_v2f64:
745 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
746 ; CHECK-NEXT: vle64.v v8, (a0)
747 ; CHECK-NEXT: vfabs.v v8, v8
748 ; CHECK-NEXT: vse64.v v8, (a0)
750 %a = load <2 x double>, ptr %x
751 %b = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a)
752 store <2 x double> %b, ptr %x
756 define void @copysign_v8bf16(ptr %x, ptr %y) {
757 ; CHECK-LABEL: copysign_v8bf16:
759 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
760 ; CHECK-NEXT: vle16.v v8, (a1)
761 ; CHECK-NEXT: vle16.v v9, (a0)
762 ; CHECK-NEXT: lui a1, 8
763 ; CHECK-NEXT: vand.vx v8, v8, a1
764 ; CHECK-NEXT: addi a1, a1, -1
765 ; CHECK-NEXT: vand.vx v9, v9, a1
766 ; CHECK-NEXT: vor.vv v8, v9, v8
767 ; CHECK-NEXT: vse16.v v8, (a0)
769 %a = load <8 x bfloat>, ptr %x
770 %b = load <8 x bfloat>, ptr %y
771 %c = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
772 store <8 x bfloat> %c, ptr %x
776 define void @copysign_v6bf16(ptr %x, ptr %y) {
777 ; CHECK-LABEL: copysign_v6bf16:
779 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
780 ; CHECK-NEXT: vle16.v v8, (a1)
781 ; CHECK-NEXT: vle16.v v9, (a0)
782 ; CHECK-NEXT: lui a1, 8
783 ; CHECK-NEXT: vand.vx v8, v8, a1
784 ; CHECK-NEXT: addi a1, a1, -1
785 ; CHECK-NEXT: vand.vx v9, v9, a1
786 ; CHECK-NEXT: vor.vv v8, v9, v8
787 ; CHECK-NEXT: vse16.v v8, (a0)
789 %a = load <6 x bfloat>, ptr %x
790 %b = load <6 x bfloat>, ptr %y
791 %c = call <6 x bfloat> @llvm.copysign.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b)
792 store <6 x bfloat> %c, ptr %x
796 define void @copysign_v8f16(ptr %x, ptr %y) {
797 ; ZVFH-LABEL: copysign_v8f16:
799 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
800 ; ZVFH-NEXT: vle16.v v8, (a0)
801 ; ZVFH-NEXT: vle16.v v9, (a1)
802 ; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
803 ; ZVFH-NEXT: vse16.v v8, (a0)
806 ; ZVFHMIN-LABEL: copysign_v8f16:
808 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
809 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
810 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
811 ; ZVFHMIN-NEXT: lui a1, 8
812 ; ZVFHMIN-NEXT: vand.vx v8, v8, a1
813 ; ZVFHMIN-NEXT: addi a1, a1, -1
814 ; ZVFHMIN-NEXT: vand.vx v9, v9, a1
815 ; ZVFHMIN-NEXT: vor.vv v8, v9, v8
816 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
818 %a = load <8 x half>, ptr %x
819 %b = load <8 x half>, ptr %y
820 %c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b)
821 store <8 x half> %c, ptr %x
825 define void @copysign_v6f16(ptr %x, ptr %y) {
826 ; ZVFH-LABEL: copysign_v6f16:
828 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
829 ; ZVFH-NEXT: vle16.v v8, (a0)
830 ; ZVFH-NEXT: vle16.v v9, (a1)
831 ; ZVFH-NEXT: vfsgnj.vv v8, v8, v9
832 ; ZVFH-NEXT: vse16.v v8, (a0)
835 ; ZVFHMIN-LABEL: copysign_v6f16:
837 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
838 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
839 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
840 ; ZVFHMIN-NEXT: lui a1, 8
841 ; ZVFHMIN-NEXT: vand.vx v8, v8, a1
842 ; ZVFHMIN-NEXT: addi a1, a1, -1
843 ; ZVFHMIN-NEXT: vand.vx v9, v9, a1
844 ; ZVFHMIN-NEXT: vor.vv v8, v9, v8
845 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
847 %a = load <6 x half>, ptr %x
848 %b = load <6 x half>, ptr %y
849 %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b)
850 store <6 x half> %c, ptr %x
854 define void @copysign_v4f32(ptr %x, ptr %y) {
855 ; CHECK-LABEL: copysign_v4f32:
857 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
858 ; CHECK-NEXT: vle32.v v8, (a0)
859 ; CHECK-NEXT: vle32.v v9, (a1)
860 ; CHECK-NEXT: vfsgnj.vv v8, v8, v9
861 ; CHECK-NEXT: vse32.v v8, (a0)
863 %a = load <4 x float>, ptr %x
864 %b = load <4 x float>, ptr %y
865 %c = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
866 store <4 x float> %c, ptr %x
870 define void @copysign_v2f64(ptr %x, ptr %y) {
871 ; CHECK-LABEL: copysign_v2f64:
873 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
874 ; CHECK-NEXT: vle64.v v8, (a0)
875 ; CHECK-NEXT: vle64.v v9, (a1)
876 ; CHECK-NEXT: vfsgnj.vv v8, v8, v9
877 ; CHECK-NEXT: vse64.v v8, (a0)
879 %a = load <2 x double>, ptr %x
880 %b = load <2 x double>, ptr %y
881 %c = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
882 store <2 x double> %c, ptr %x
886 define void @copysign_vf_v8bf16(ptr %x, bfloat %y) {
887 ; CHECK-LABEL: copysign_vf_v8bf16:
889 ; CHECK-NEXT: fmv.x.w a1, fa0
890 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
891 ; CHECK-NEXT: vle16.v v8, (a0)
892 ; CHECK-NEXT: lui a2, 8
893 ; CHECK-NEXT: vmv.v.x v9, a1
894 ; CHECK-NEXT: addi a1, a2, -1
895 ; CHECK-NEXT: vand.vx v8, v8, a1
896 ; CHECK-NEXT: vand.vx v9, v9, a2
897 ; CHECK-NEXT: vor.vv v8, v8, v9
898 ; CHECK-NEXT: vse16.v v8, (a0)
900 %a = load <8 x bfloat>, ptr %x
901 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
902 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
903 %d = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %c)
904 store <8 x bfloat> %d, ptr %x
908 define void @copysign_vf_v6bf16(ptr %x, bfloat %y) {
909 ; CHECK-LABEL: copysign_vf_v6bf16:
911 ; CHECK-NEXT: fmv.x.w a1, fa0
912 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
913 ; CHECK-NEXT: vle16.v v8, (a0)
914 ; CHECK-NEXT: lui a2, 8
915 ; CHECK-NEXT: vmv.v.x v9, a1
916 ; CHECK-NEXT: addi a1, a2, -1
917 ; CHECK-NEXT: vand.vx v8, v8, a1
918 ; CHECK-NEXT: vand.vx v9, v9, a2
919 ; CHECK-NEXT: vor.vv v8, v8, v9
920 ; CHECK-NEXT: vse16.v v8, (a0)
922 %a = load <6 x bfloat>, ptr %x
923 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
924 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
925 %d = call <6 x bfloat> @llvm.copysign.v6bf16(<6 x bfloat> %a, <6 x bfloat> %c)
926 store <6 x bfloat> %d, ptr %x
930 define void @copysign_vf_v8f16(ptr %x, half %y) {
931 ; ZVFH-LABEL: copysign_vf_v8f16:
933 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
934 ; ZVFH-NEXT: vle16.v v8, (a0)
935 ; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0
936 ; ZVFH-NEXT: vse16.v v8, (a0)
939 ; ZVFHMIN-LABEL: copysign_vf_v8f16:
941 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
942 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
943 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
944 ; ZVFHMIN-NEXT: lui a2, 8
945 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
946 ; ZVFHMIN-NEXT: addi a1, a2, -1
947 ; ZVFHMIN-NEXT: vand.vx v8, v8, a1
948 ; ZVFHMIN-NEXT: vand.vx v9, v9, a2
949 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9
950 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
952 %a = load <8 x half>, ptr %x
953 %b = insertelement <8 x half> poison, half %y, i32 0
954 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
955 %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c)
956 store <8 x half> %d, ptr %x
960 define void @copysign_vf_v6f16(ptr %x, half %y) {
961 ; ZVFH-LABEL: copysign_vf_v6f16:
963 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
964 ; ZVFH-NEXT: vle16.v v8, (a0)
965 ; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0
966 ; ZVFH-NEXT: vse16.v v8, (a0)
969 ; ZVFHMIN-LABEL: copysign_vf_v6f16:
971 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
972 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
973 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
974 ; ZVFHMIN-NEXT: lui a2, 8
975 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
976 ; ZVFHMIN-NEXT: addi a1, a2, -1
977 ; ZVFHMIN-NEXT: vand.vx v8, v8, a1
978 ; ZVFHMIN-NEXT: vand.vx v9, v9, a2
979 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9
980 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
982 %a = load <6 x half>, ptr %x
983 %b = insertelement <6 x half> poison, half %y, i32 0
984 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
985 %d = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %c)
986 store <6 x half> %d, ptr %x
990 define void @copysign_vf_v4f32(ptr %x, float %y) {
991 ; CHECK-LABEL: copysign_vf_v4f32:
993 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
994 ; CHECK-NEXT: vle32.v v8, (a0)
995 ; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
996 ; CHECK-NEXT: vse32.v v8, (a0)
998 %a = load <4 x float>, ptr %x
999 %b = insertelement <4 x float> poison, float %y, i32 0
1000 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
1001 %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c)
1002 store <4 x float> %d, ptr %x
1006 define void @copysign_vf_v2f64(ptr %x, double %y) {
1007 ; CHECK-LABEL: copysign_vf_v2f64:
1009 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1010 ; CHECK-NEXT: vle64.v v8, (a0)
1011 ; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
1012 ; CHECK-NEXT: vse64.v v8, (a0)
1014 %a = load <2 x double>, ptr %x
1015 %b = insertelement <2 x double> poison, double %y, i32 0
1016 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
1017 %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c)
1018 store <2 x double> %d, ptr %x
1022 define void @copysign_neg_v8bf16(ptr %x, ptr %y) {
1023 ; CHECK-LABEL: copysign_neg_v8bf16:
1025 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1026 ; CHECK-NEXT: vle16.v v8, (a1)
1027 ; CHECK-NEXT: vle16.v v9, (a0)
1028 ; CHECK-NEXT: lui a1, 8
1029 ; CHECK-NEXT: addi a2, a1, -1
1030 ; CHECK-NEXT: vxor.vx v8, v8, a1
1031 ; CHECK-NEXT: vand.vx v9, v9, a2
1032 ; CHECK-NEXT: vand.vx v8, v8, a1
1033 ; CHECK-NEXT: vor.vv v8, v9, v8
1034 ; CHECK-NEXT: vse16.v v8, (a0)
1036 %a = load <8 x bfloat>, ptr %x
1037 %b = load <8 x bfloat>, ptr %y
1038 %c = fneg <8 x bfloat> %b
1039 %d = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %c)
1040 store <8 x bfloat> %d, ptr %x
1044 define void @copysign_neg_v6bf16(ptr %x, ptr %y) {
1045 ; CHECK-LABEL: copysign_neg_v6bf16:
1047 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1048 ; CHECK-NEXT: vle16.v v8, (a1)
1049 ; CHECK-NEXT: vle16.v v9, (a0)
1050 ; CHECK-NEXT: lui a1, 8
1051 ; CHECK-NEXT: addi a2, a1, -1
1052 ; CHECK-NEXT: vxor.vx v8, v8, a1
1053 ; CHECK-NEXT: vand.vx v9, v9, a2
1054 ; CHECK-NEXT: vand.vx v8, v8, a1
1055 ; CHECK-NEXT: vor.vv v8, v9, v8
1056 ; CHECK-NEXT: vse16.v v8, (a0)
1058 %a = load <6 x bfloat>, ptr %x
1059 %b = load <6 x bfloat>, ptr %y
1060 %c = fneg <6 x bfloat> %b
1061 %d = call <6 x bfloat> @llvm.copysign.v6bf16(<6 x bfloat> %a, <6 x bfloat> %c)
1062 store <6 x bfloat> %d, ptr %x
1066 define void @copysign_neg_v8f16(ptr %x, ptr %y) {
1067 ; ZVFH-LABEL: copysign_neg_v8f16:
1069 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1070 ; ZVFH-NEXT: vle16.v v8, (a0)
1071 ; ZVFH-NEXT: vle16.v v9, (a1)
1072 ; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9
1073 ; ZVFH-NEXT: vse16.v v8, (a0)
1076 ; ZVFHMIN-LABEL: copysign_neg_v8f16:
1078 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1079 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
1080 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1081 ; ZVFHMIN-NEXT: lui a1, 8
1082 ; ZVFHMIN-NEXT: addi a2, a1, -1
1083 ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
1084 ; ZVFHMIN-NEXT: vand.vx v9, v9, a2
1085 ; ZVFHMIN-NEXT: vand.vx v8, v8, a1
1086 ; ZVFHMIN-NEXT: vor.vv v8, v9, v8
1087 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
1089 %a = load <8 x half>, ptr %x
1090 %b = load <8 x half>, ptr %y
1091 %c = fneg <8 x half> %b
1092 %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c)
1093 store <8 x half> %d, ptr %x
1097 define void @copysign_neg_v6f16(ptr %x, ptr %y) {
1098 ; ZVFH-LABEL: copysign_neg_v6f16:
1100 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1101 ; ZVFH-NEXT: vle16.v v8, (a0)
1102 ; ZVFH-NEXT: vle16.v v9, (a1)
1103 ; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9
1104 ; ZVFH-NEXT: vse16.v v8, (a0)
1107 ; ZVFHMIN-LABEL: copysign_neg_v6f16:
1109 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1110 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
1111 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1112 ; ZVFHMIN-NEXT: lui a1, 8
1113 ; ZVFHMIN-NEXT: addi a2, a1, -1
1114 ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
1115 ; ZVFHMIN-NEXT: vand.vx v9, v9, a2
1116 ; ZVFHMIN-NEXT: vand.vx v8, v8, a1
1117 ; ZVFHMIN-NEXT: vor.vv v8, v9, v8
1118 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
1120 %a = load <6 x half>, ptr %x
1121 %b = load <6 x half>, ptr %y
1122 %c = fneg <6 x half> %b
1123 %d = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %c)
1124 store <6 x half> %d, ptr %x
1128 define void @copysign_neg_v4f32(ptr %x, ptr %y) {
1129 ; CHECK-LABEL: copysign_neg_v4f32:
1131 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1132 ; CHECK-NEXT: vle32.v v8, (a0)
1133 ; CHECK-NEXT: vle32.v v9, (a1)
1134 ; CHECK-NEXT: vfsgnjn.vv v8, v8, v9
1135 ; CHECK-NEXT: vse32.v v8, (a0)
1137 %a = load <4 x float>, ptr %x
1138 %b = load <4 x float>, ptr %y
1139 %c = fneg <4 x float> %b
1140 %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c)
1141 store <4 x float> %d, ptr %x
1145 define void @copysign_neg_v2f64(ptr %x, ptr %y) {
1146 ; CHECK-LABEL: copysign_neg_v2f64:
1148 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1149 ; CHECK-NEXT: vle64.v v8, (a0)
1150 ; CHECK-NEXT: vle64.v v9, (a1)
1151 ; CHECK-NEXT: vfsgnjn.vv v8, v8, v9
1152 ; CHECK-NEXT: vse64.v v8, (a0)
1154 %a = load <2 x double>, ptr %x
1155 %b = load <2 x double>, ptr %y
1156 %c = fneg <2 x double> %b
1157 %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c)
1158 store <2 x double> %d, ptr %x
1162 define void @copysign_neg_trunc_v4bf16_v4f32(ptr %x, ptr %y) {
1163 ; CHECK-LABEL: copysign_neg_trunc_v4bf16_v4f32:
1165 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1166 ; CHECK-NEXT: vle16.v v8, (a0)
1167 ; CHECK-NEXT: vle32.v v9, (a1)
1168 ; CHECK-NEXT: lui a1, 8
1169 ; CHECK-NEXT: addi a2, a1, -1
1170 ; CHECK-NEXT: vand.vx v8, v8, a2
1171 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v9
1172 ; CHECK-NEXT: vxor.vx v9, v10, a1
1173 ; CHECK-NEXT: vand.vx v9, v9, a1
1174 ; CHECK-NEXT: vor.vv v8, v8, v9
1175 ; CHECK-NEXT: vse16.v v8, (a0)
1177 %a = load <4 x bfloat>, ptr %x
1178 %b = load <4 x float>, ptr %y
1179 %c = fneg <4 x float> %b
1180 %d = fptrunc <4 x float> %c to <4 x bfloat>
1181 %e = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %a, <4 x bfloat> %d)
1182 store <4 x bfloat> %e, ptr %x
1186 define void @copysign_neg_trunc_v3bf16_v3f32(ptr %x, ptr %y) {
1187 ; CHECK-LABEL: copysign_neg_trunc_v3bf16_v3f32:
1189 ; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
1190 ; CHECK-NEXT: vle16.v v8, (a0)
1191 ; CHECK-NEXT: vle32.v v9, (a1)
1192 ; CHECK-NEXT: lui a1, 8
1193 ; CHECK-NEXT: addi a2, a1, -1
1194 ; CHECK-NEXT: vand.vx v8, v8, a2
1195 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v9
1196 ; CHECK-NEXT: vxor.vx v9, v10, a1
1197 ; CHECK-NEXT: vand.vx v9, v9, a1
1198 ; CHECK-NEXT: vor.vv v8, v8, v9
1199 ; CHECK-NEXT: vse16.v v8, (a0)
1201 %a = load <3 x bfloat>, ptr %x
1202 %b = load <3 x float>, ptr %y
1203 %c = fneg <3 x float> %b
1204 %d = fptrunc <3 x float> %c to <3 x bfloat>
1205 %e = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> %a, <3 x bfloat> %d)
1206 store <3 x bfloat> %e, ptr %x
1210 define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
1211 ; ZVFH-LABEL: copysign_neg_trunc_v4f16_v4f32:
1213 ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1214 ; ZVFH-NEXT: vle32.v v8, (a1)
1215 ; ZVFH-NEXT: vle16.v v9, (a0)
1216 ; ZVFH-NEXT: vfncvt.f.f.w v10, v8
1217 ; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10
1218 ; ZVFH-NEXT: vse16.v v8, (a0)
1221 ; ZVFHMIN-LABEL: copysign_neg_trunc_v4f16_v4f32:
1223 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
1224 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
1225 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
1226 ; ZVFHMIN-NEXT: lui a1, 8
1227 ; ZVFHMIN-NEXT: addi a2, a1, -1
1228 ; ZVFHMIN-NEXT: vand.vx v8, v8, a2
1229 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
1230 ; ZVFHMIN-NEXT: vxor.vx v9, v10, a1
1231 ; ZVFHMIN-NEXT: vand.vx v9, v9, a1
1232 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9
1233 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
1235 %a = load <4 x half>, ptr %x
1236 %b = load <4 x float>, ptr %y
1237 %c = fneg <4 x float> %b
1238 %d = fptrunc <4 x float> %c to <4 x half>
1239 %e = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %d)
1240 store <4 x half> %e, ptr %x
1244 define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
1245 ; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32:
1247 ; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
1248 ; ZVFH-NEXT: vle32.v v8, (a1)
1249 ; ZVFH-NEXT: vle16.v v9, (a0)
1250 ; ZVFH-NEXT: vfncvt.f.f.w v10, v8
1251 ; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10
1252 ; ZVFH-NEXT: vse16.v v8, (a0)
1255 ; ZVFHMIN-LABEL: copysign_neg_trunc_v3f16_v3f32:
1257 ; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
1258 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
1259 ; ZVFHMIN-NEXT: vle32.v v9, (a1)
1260 ; ZVFHMIN-NEXT: lui a1, 8
1261 ; ZVFHMIN-NEXT: addi a2, a1, -1
1262 ; ZVFHMIN-NEXT: vand.vx v8, v8, a2
1263 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9
1264 ; ZVFHMIN-NEXT: vxor.vx v9, v10, a1
1265 ; ZVFHMIN-NEXT: vand.vx v9, v9, a1
1266 ; ZVFHMIN-NEXT: vor.vv v8, v8, v9
1267 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
1269 %a = load <3 x half>, ptr %x
1270 %b = load <3 x float>, ptr %y
1271 %c = fneg <3 x float> %b
1272 %d = fptrunc <3 x float> %c to <3 x half>
1273 %e = call <3 x half> @llvm.copysign.v3f16(<3 x half> %a, <3 x half> %d)
1274 store <3 x half> %e, ptr %x
1278 define void @copysign_neg_ext_v2f64_v2f32(ptr %x, ptr %y) {
1279 ; CHECK-LABEL: copysign_neg_ext_v2f64_v2f32:
1281 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
1282 ; CHECK-NEXT: vle32.v v8, (a1)
1283 ; CHECK-NEXT: vle64.v v9, (a0)
1284 ; CHECK-NEXT: vfwcvt.f.f.v v10, v8
1285 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
1286 ; CHECK-NEXT: vfsgnjn.vv v8, v9, v10
1287 ; CHECK-NEXT: vse64.v v8, (a0)
1289 %a = load <2 x double>, ptr %x
1290 %b = load <2 x float>, ptr %y
1291 %c = fneg <2 x float> %b
1292 %d = fpext <2 x float> %c to <2 x double>
1293 %e = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %d)
1294 store <2 x double> %e, ptr %x
1298 define void @sqrt_v8bf16(ptr %x) {
1299 ; CHECK-LABEL: sqrt_v8bf16:
1301 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1302 ; CHECK-NEXT: vle16.v v8, (a0)
1303 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
1304 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1305 ; CHECK-NEXT: vfsqrt.v v8, v10
1306 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1307 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
1308 ; CHECK-NEXT: vse16.v v10, (a0)
1310 %a = load <8 x bfloat>, ptr %x
1311 %b = call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %a)
1312 store <8 x bfloat> %b, ptr %x
1316 define void @sqrt_v6bf16(ptr %x) {
1317 ; CHECK-LABEL: sqrt_v6bf16:
1319 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1320 ; CHECK-NEXT: vle16.v v8, (a0)
1321 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1322 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
1323 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1324 ; CHECK-NEXT: vfsqrt.v v8, v10
1325 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1326 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
1327 ; CHECK-NEXT: vse16.v v10, (a0)
1329 %a = load <6 x bfloat>, ptr %x
1330 %b = call <6 x bfloat> @llvm.sqrt.v6bf16(<6 x bfloat> %a)
1331 store <6 x bfloat> %b, ptr %x
1335 define void @sqrt_v8f16(ptr %x) {
1336 ; ZVFH-LABEL: sqrt_v8f16:
1338 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1339 ; ZVFH-NEXT: vle16.v v8, (a0)
1340 ; ZVFH-NEXT: vfsqrt.v v8, v8
1341 ; ZVFH-NEXT: vse16.v v8, (a0)
1344 ; ZVFHMIN-LABEL: sqrt_v8f16:
1346 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1347 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
1348 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
1349 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1350 ; ZVFHMIN-NEXT: vfsqrt.v v8, v10
1351 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1352 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
1353 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
1355 %a = load <8 x half>, ptr %x
1356 %b = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a)
1357 store <8 x half> %b, ptr %x
1361 define void @sqrt_v6f16(ptr %x) {
1362 ; ZVFH-LABEL: sqrt_v6f16:
1364 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1365 ; ZVFH-NEXT: vle16.v v8, (a0)
1366 ; ZVFH-NEXT: vfsqrt.v v8, v8
1367 ; ZVFH-NEXT: vse16.v v8, (a0)
1370 ; ZVFHMIN-LABEL: sqrt_v6f16:
1372 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1373 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
1374 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1375 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
1376 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1377 ; ZVFHMIN-NEXT: vfsqrt.v v8, v10
1378 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1379 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
1380 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
1382 %a = load <6 x half>, ptr %x
1383 %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a)
1384 store <6 x half> %b, ptr %x
1388 define void @sqrt_v4f32(ptr %x) {
1389 ; CHECK-LABEL: sqrt_v4f32:
1391 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1392 ; CHECK-NEXT: vle32.v v8, (a0)
1393 ; CHECK-NEXT: vfsqrt.v v8, v8
1394 ; CHECK-NEXT: vse32.v v8, (a0)
1396 %a = load <4 x float>, ptr %x
1397 %b = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
1398 store <4 x float> %b, ptr %x
1402 define void @sqrt_v2f64(ptr %x) {
1403 ; CHECK-LABEL: sqrt_v2f64:
1405 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1406 ; CHECK-NEXT: vle64.v v8, (a0)
1407 ; CHECK-NEXT: vfsqrt.v v8, v8
1408 ; CHECK-NEXT: vse64.v v8, (a0)
1410 %a = load <2 x double>, ptr %x
1411 %b = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
1412 store <2 x double> %b, ptr %x
1416 define void @fma_v8bf16(ptr %x, ptr %y, ptr %z) {
1417 ; CHECK-LABEL: fma_v8bf16:
1419 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1420 ; CHECK-NEXT: vle16.v v8, (a2)
1421 ; CHECK-NEXT: vle16.v v9, (a0)
1422 ; CHECK-NEXT: vle16.v v10, (a1)
1423 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
1424 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
1425 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
1426 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1427 ; CHECK-NEXT: vfmadd.vv v8, v14, v12
1428 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1429 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
1430 ; CHECK-NEXT: vse16.v v10, (a0)
1432 %a = load <8 x bfloat>, ptr %x
1433 %b = load <8 x bfloat>, ptr %y
1434 %c = load <8 x bfloat>, ptr %z
1435 %d = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c)
1436 store <8 x bfloat> %d, ptr %x
1440 define void @fma_v6bf16(ptr %x, ptr %y, ptr %z) {
1441 ; CHECK-LABEL: fma_v6bf16:
1443 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1444 ; CHECK-NEXT: vle16.v v8, (a2)
1445 ; CHECK-NEXT: vle16.v v9, (a0)
1446 ; CHECK-NEXT: vle16.v v10, (a1)
1447 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1448 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
1449 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
1450 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
1451 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1452 ; CHECK-NEXT: vfmadd.vv v8, v14, v12
1453 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1454 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
1455 ; CHECK-NEXT: vse16.v v10, (a0)
1457 %a = load <6 x bfloat>, ptr %x
1458 %b = load <6 x bfloat>, ptr %y
1459 %c = load <6 x bfloat>, ptr %z
1460 %d = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b, <6 x bfloat> %c)
1461 store <6 x bfloat> %d, ptr %x
1465 define void @fma_v8f16(ptr %x, ptr %y, ptr %z) {
1466 ; ZVFH-LABEL: fma_v8f16:
1468 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1469 ; ZVFH-NEXT: vle16.v v8, (a0)
1470 ; ZVFH-NEXT: vle16.v v9, (a1)
1471 ; ZVFH-NEXT: vle16.v v10, (a2)
1472 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
1473 ; ZVFH-NEXT: vse16.v v10, (a0)
1476 ; ZVFHMIN-LABEL: fma_v8f16:
1478 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1479 ; ZVFHMIN-NEXT: vle16.v v8, (a2)
1480 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1481 ; ZVFHMIN-NEXT: vle16.v v10, (a1)
1482 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
1483 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
1484 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
1485 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1486 ; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12
1487 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1488 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
1489 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
1491 %a = load <8 x half>, ptr %x
1492 %b = load <8 x half>, ptr %y
1493 %c = load <8 x half>, ptr %z
1494 %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
1495 store <8 x half> %d, ptr %x
1499 define void @fma_v6f16(ptr %x, ptr %y, ptr %z) {
1500 ; ZVFH-LABEL: fma_v6f16:
1502 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1503 ; ZVFH-NEXT: vle16.v v8, (a0)
1504 ; ZVFH-NEXT: vle16.v v9, (a1)
1505 ; ZVFH-NEXT: vle16.v v10, (a2)
1506 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
1507 ; ZVFH-NEXT: vse16.v v10, (a0)
1510 ; ZVFHMIN-LABEL: fma_v6f16:
1512 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1513 ; ZVFHMIN-NEXT: vle16.v v8, (a2)
1514 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1515 ; ZVFHMIN-NEXT: vle16.v v10, (a1)
1516 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1517 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
1518 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
1519 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
1520 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1521 ; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12
1522 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1523 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
1524 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
1526 %a = load <6 x half>, ptr %x
1527 %b = load <6 x half>, ptr %y
1528 %c = load <6 x half>, ptr %z
1529 %d = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %c)
1530 store <6 x half> %d, ptr %x
1534 define void @fma_v4f32(ptr %x, ptr %y, ptr %z) {
1535 ; CHECK-LABEL: fma_v4f32:
1537 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1538 ; CHECK-NEXT: vle32.v v8, (a0)
1539 ; CHECK-NEXT: vle32.v v9, (a1)
1540 ; CHECK-NEXT: vle32.v v10, (a2)
1541 ; CHECK-NEXT: vfmacc.vv v10, v8, v9
1542 ; CHECK-NEXT: vse32.v v10, (a0)
1544 %a = load <4 x float>, ptr %x
1545 %b = load <4 x float>, ptr %y
1546 %c = load <4 x float>, ptr %z
1547 %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
1548 store <4 x float> %d, ptr %x
1552 define void @fma_v2f64(ptr %x, ptr %y, ptr %z) {
1553 ; CHECK-LABEL: fma_v2f64:
1555 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1556 ; CHECK-NEXT: vle64.v v8, (a0)
1557 ; CHECK-NEXT: vle64.v v9, (a1)
1558 ; CHECK-NEXT: vle64.v v10, (a2)
1559 ; CHECK-NEXT: vfmacc.vv v10, v8, v9
1560 ; CHECK-NEXT: vse64.v v10, (a0)
1562 %a = load <2 x double>, ptr %x
1563 %b = load <2 x double>, ptr %y
1564 %c = load <2 x double>, ptr %z
1565 %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
1566 store <2 x double> %d, ptr %x
1570 define void @fmsub_v8bf16(ptr %x, ptr %y, ptr %z) {
1571 ; CHECK-LABEL: fmsub_v8bf16:
1573 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1574 ; CHECK-NEXT: vle16.v v8, (a2)
1575 ; CHECK-NEXT: vle16.v v9, (a0)
1576 ; CHECK-NEXT: vle16.v v10, (a1)
1577 ; CHECK-NEXT: lui a1, 8
1578 ; CHECK-NEXT: vxor.vx v8, v8, a1
1579 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
1580 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
1581 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
1582 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1583 ; CHECK-NEXT: vfmadd.vv v8, v12, v14
1584 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1585 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
1586 ; CHECK-NEXT: vse16.v v10, (a0)
1588 %a = load <8 x bfloat>, ptr %x
1589 %b = load <8 x bfloat>, ptr %y
1590 %c = load <8 x bfloat>, ptr %z
1591 %neg = fneg <8 x bfloat> %c
1592 %d = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %neg)
1593 store <8 x bfloat> %d, ptr %x
1597 define void @fmsub_v6bf16(ptr %x, ptr %y, ptr %z) {
1598 ; CHECK-LABEL: fmsub_v6bf16:
1600 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1601 ; CHECK-NEXT: vle16.v v8, (a2)
1602 ; CHECK-NEXT: vle16.v v9, (a0)
1603 ; CHECK-NEXT: vle16.v v10, (a1)
1604 ; CHECK-NEXT: lui a1, 8
1605 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1606 ; CHECK-NEXT: vxor.vx v8, v8, a1
1607 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
1608 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
1609 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
1610 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1611 ; CHECK-NEXT: vfmadd.vv v8, v12, v14
1612 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1613 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
1614 ; CHECK-NEXT: vse16.v v10, (a0)
1616 %a = load <6 x bfloat>, ptr %x
1617 %b = load <6 x bfloat>, ptr %y
1618 %c = load <6 x bfloat>, ptr %z
1619 %neg = fneg <6 x bfloat> %c
1620 %d = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b, <6 x bfloat> %neg)
1621 store <6 x bfloat> %d, ptr %x
1625 define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
1626 ; ZVFH-LABEL: fmsub_v8f16:
1628 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1629 ; ZVFH-NEXT: vle16.v v8, (a0)
1630 ; ZVFH-NEXT: vle16.v v9, (a1)
1631 ; ZVFH-NEXT: vle16.v v10, (a2)
1632 ; ZVFH-NEXT: vfmsac.vv v10, v8, v9
1633 ; ZVFH-NEXT: vse16.v v10, (a0)
1636 ; ZVFHMIN-LABEL: fmsub_v8f16:
1638 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1639 ; ZVFHMIN-NEXT: vle16.v v8, (a2)
1640 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1641 ; ZVFHMIN-NEXT: vle16.v v10, (a1)
1642 ; ZVFHMIN-NEXT: lui a1, 8
1643 ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
1644 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
1645 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
1646 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
1647 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1648 ; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14
1649 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
1650 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
1651 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
1653 %a = load <8 x half>, ptr %x
1654 %b = load <8 x half>, ptr %y
1655 %c = load <8 x half>, ptr %z
1656 %neg = fneg <8 x half> %c
1657 %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg)
1658 store <8 x half> %d, ptr %x
1662 define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
1663 ; ZVFH-LABEL: fmsub_v6f16:
1665 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1666 ; ZVFH-NEXT: vle16.v v8, (a0)
1667 ; ZVFH-NEXT: vle16.v v9, (a1)
1668 ; ZVFH-NEXT: vle16.v v10, (a2)
1669 ; ZVFH-NEXT: vfmsac.vv v10, v8, v9
1670 ; ZVFH-NEXT: vse16.v v10, (a0)
1673 ; ZVFHMIN-LABEL: fmsub_v6f16:
1675 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1676 ; ZVFHMIN-NEXT: vle16.v v8, (a2)
1677 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
1678 ; ZVFHMIN-NEXT: vle16.v v10, (a1)
1679 ; ZVFHMIN-NEXT: lui a1, 8
1680 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
1681 ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
1682 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
1683 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
1684 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
1685 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1686 ; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14
1687 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
1688 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
1689 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
1691 %a = load <6 x half>, ptr %x
1692 %b = load <6 x half>, ptr %y
1693 %c = load <6 x half>, ptr %z
1694 %neg = fneg <6 x half> %c
1695 %d = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %neg)
1696 store <6 x half> %d, ptr %x
1700 define void @fnmsub_v4f32(ptr %x, ptr %y, ptr %z) {
1701 ; CHECK-LABEL: fnmsub_v4f32:
1703 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1704 ; CHECK-NEXT: vle32.v v8, (a0)
1705 ; CHECK-NEXT: vle32.v v9, (a1)
1706 ; CHECK-NEXT: vle32.v v10, (a2)
1707 ; CHECK-NEXT: vfnmsac.vv v10, v8, v9
1708 ; CHECK-NEXT: vse32.v v10, (a0)
1710 %a = load <4 x float>, ptr %x
1711 %b = load <4 x float>, ptr %y
1712 %c = load <4 x float>, ptr %z
1713 %neg = fneg <4 x float> %a
1714 %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c)
1715 store <4 x float> %d, ptr %x
1719 define void @fnmadd_v2f64(ptr %x, ptr %y, ptr %z) {
1720 ; CHECK-LABEL: fnmadd_v2f64:
1722 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1723 ; CHECK-NEXT: vle64.v v8, (a0)
1724 ; CHECK-NEXT: vle64.v v9, (a1)
1725 ; CHECK-NEXT: vle64.v v10, (a2)
1726 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9
1727 ; CHECK-NEXT: vse64.v v10, (a0)
1729 %a = load <2 x double>, ptr %x
1730 %b = load <2 x double>, ptr %y
1731 %c = load <2 x double>, ptr %z
1732 %neg = fneg <2 x double> %b
1733 %neg2 = fneg <2 x double> %c
1734 %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2)
1735 store <2 x double> %d, ptr %x
1739 define void @fadd_v16bf16(ptr %x, ptr %y) {
1740 ; CHECK-LABEL: fadd_v16bf16:
1742 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1743 ; CHECK-NEXT: vle16.v v8, (a1)
1744 ; CHECK-NEXT: vle16.v v10, (a0)
1745 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
1746 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
1747 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1748 ; CHECK-NEXT: vfadd.vv v8, v16, v12
1749 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1750 ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
1751 ; CHECK-NEXT: vse16.v v12, (a0)
1753 %a = load <16 x bfloat>, ptr %x
1754 %b = load <16 x bfloat>, ptr %y
1755 %c = fadd <16 x bfloat> %a, %b
1756 store <16 x bfloat> %c, ptr %x
1760 define void @fadd_v16f16(ptr %x, ptr %y) {
1761 ; ZVFH-LABEL: fadd_v16f16:
1763 ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1764 ; ZVFH-NEXT: vle16.v v8, (a0)
1765 ; ZVFH-NEXT: vle16.v v10, (a1)
1766 ; ZVFH-NEXT: vfadd.vv v8, v8, v10
1767 ; ZVFH-NEXT: vse16.v v8, (a0)
1770 ; ZVFHMIN-LABEL: fadd_v16f16:
1772 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1773 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
1774 ; ZVFHMIN-NEXT: vle16.v v10, (a0)
1775 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
1776 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
1777 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1778 ; ZVFHMIN-NEXT: vfadd.vv v8, v16, v12
1779 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1780 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
1781 ; ZVFHMIN-NEXT: vse16.v v12, (a0)
1783 %a = load <16 x half>, ptr %x
1784 %b = load <16 x half>, ptr %y
1785 %c = fadd <16 x half> %a, %b
1786 store <16 x half> %c, ptr %x
1790 define void @fadd_v8f32(ptr %x, ptr %y) {
1791 ; CHECK-LABEL: fadd_v8f32:
1793 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1794 ; CHECK-NEXT: vle32.v v8, (a0)
1795 ; CHECK-NEXT: vle32.v v10, (a1)
1796 ; CHECK-NEXT: vfadd.vv v8, v8, v10
1797 ; CHECK-NEXT: vse32.v v8, (a0)
1799 %a = load <8 x float>, ptr %x
1800 %b = load <8 x float>, ptr %y
1801 %c = fadd <8 x float> %a, %b
1802 store <8 x float> %c, ptr %x
1806 define void @fadd_v4f64(ptr %x, ptr %y) {
1807 ; CHECK-LABEL: fadd_v4f64:
1809 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1810 ; CHECK-NEXT: vle64.v v8, (a0)
1811 ; CHECK-NEXT: vle64.v v10, (a1)
1812 ; CHECK-NEXT: vfadd.vv v8, v8, v10
1813 ; CHECK-NEXT: vse64.v v8, (a0)
1815 %a = load <4 x double>, ptr %x
1816 %b = load <4 x double>, ptr %y
1817 %c = fadd <4 x double> %a, %b
1818 store <4 x double> %c, ptr %x
1822 define void @fsub_v16bf16(ptr %x, ptr %y) {
1823 ; CHECK-LABEL: fsub_v16bf16:
1825 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1826 ; CHECK-NEXT: vle16.v v8, (a1)
1827 ; CHECK-NEXT: vle16.v v10, (a0)
1828 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
1829 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
1830 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1831 ; CHECK-NEXT: vfsub.vv v8, v16, v12
1832 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1833 ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
1834 ; CHECK-NEXT: vse16.v v12, (a0)
1836 %a = load <16 x bfloat>, ptr %x
1837 %b = load <16 x bfloat>, ptr %y
1838 %c = fsub <16 x bfloat> %a, %b
1839 store <16 x bfloat> %c, ptr %x
1843 define void @fsub_v16f16(ptr %x, ptr %y) {
1844 ; ZVFH-LABEL: fsub_v16f16:
1846 ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1847 ; ZVFH-NEXT: vle16.v v8, (a0)
1848 ; ZVFH-NEXT: vle16.v v10, (a1)
1849 ; ZVFH-NEXT: vfsub.vv v8, v8, v10
1850 ; ZVFH-NEXT: vse16.v v8, (a0)
1853 ; ZVFHMIN-LABEL: fsub_v16f16:
1855 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1856 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
1857 ; ZVFHMIN-NEXT: vle16.v v10, (a0)
1858 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
1859 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
1860 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1861 ; ZVFHMIN-NEXT: vfsub.vv v8, v16, v12
1862 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1863 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
1864 ; ZVFHMIN-NEXT: vse16.v v12, (a0)
1866 %a = load <16 x half>, ptr %x
1867 %b = load <16 x half>, ptr %y
1868 %c = fsub <16 x half> %a, %b
1869 store <16 x half> %c, ptr %x
1873 define void @fsub_v8f32(ptr %x, ptr %y) {
1874 ; CHECK-LABEL: fsub_v8f32:
1876 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1877 ; CHECK-NEXT: vle32.v v8, (a0)
1878 ; CHECK-NEXT: vle32.v v10, (a1)
1879 ; CHECK-NEXT: vfsub.vv v8, v8, v10
1880 ; CHECK-NEXT: vse32.v v8, (a0)
1882 %a = load <8 x float>, ptr %x
1883 %b = load <8 x float>, ptr %y
1884 %c = fsub <8 x float> %a, %b
1885 store <8 x float> %c, ptr %x
1889 define void @fsub_v4f64(ptr %x, ptr %y) {
1890 ; CHECK-LABEL: fsub_v4f64:
1892 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1893 ; CHECK-NEXT: vle64.v v8, (a0)
1894 ; CHECK-NEXT: vle64.v v10, (a1)
1895 ; CHECK-NEXT: vfsub.vv v8, v8, v10
1896 ; CHECK-NEXT: vse64.v v8, (a0)
1898 %a = load <4 x double>, ptr %x
1899 %b = load <4 x double>, ptr %y
1900 %c = fsub <4 x double> %a, %b
1901 store <4 x double> %c, ptr %x
1905 define void @fmul_v16bf16(ptr %x, ptr %y) {
1906 ; CHECK-LABEL: fmul_v16bf16:
1908 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1909 ; CHECK-NEXT: vle16.v v8, (a1)
1910 ; CHECK-NEXT: vle16.v v10, (a0)
1911 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
1912 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
1913 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1914 ; CHECK-NEXT: vfmul.vv v8, v16, v12
1915 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1916 ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
1917 ; CHECK-NEXT: vse16.v v12, (a0)
1919 %a = load <16 x bfloat>, ptr %x
1920 %b = load <16 x bfloat>, ptr %y
1921 %c = fmul <16 x bfloat> %a, %b
1922 store <16 x bfloat> %c, ptr %x
1926 define void @fmul_v16f16(ptr %x, ptr %y) {
1927 ; ZVFH-LABEL: fmul_v16f16:
1929 ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1930 ; ZVFH-NEXT: vle16.v v8, (a0)
1931 ; ZVFH-NEXT: vle16.v v10, (a1)
1932 ; ZVFH-NEXT: vfmul.vv v8, v8, v10
1933 ; ZVFH-NEXT: vse16.v v8, (a0)
1936 ; ZVFHMIN-LABEL: fmul_v16f16:
1938 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1939 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
1940 ; ZVFHMIN-NEXT: vle16.v v10, (a0)
1941 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
1942 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
1943 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1944 ; ZVFHMIN-NEXT: vfmul.vv v8, v16, v12
1945 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1946 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
1947 ; ZVFHMIN-NEXT: vse16.v v12, (a0)
1949 %a = load <16 x half>, ptr %x
1950 %b = load <16 x half>, ptr %y
1951 %c = fmul <16 x half> %a, %b
1952 store <16 x half> %c, ptr %x
1956 define void @fmul_v8f32(ptr %x, ptr %y) {
1957 ; CHECK-LABEL: fmul_v8f32:
1959 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
1960 ; CHECK-NEXT: vle32.v v8, (a0)
1961 ; CHECK-NEXT: vle32.v v10, (a1)
1962 ; CHECK-NEXT: vfmul.vv v8, v8, v10
1963 ; CHECK-NEXT: vse32.v v8, (a0)
1965 %a = load <8 x float>, ptr %x
1966 %b = load <8 x float>, ptr %y
1967 %c = fmul <8 x float> %a, %b
1968 store <8 x float> %c, ptr %x
1972 define void @fmul_v4f64(ptr %x, ptr %y) {
1973 ; CHECK-LABEL: fmul_v4f64:
1975 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1976 ; CHECK-NEXT: vle64.v v8, (a0)
1977 ; CHECK-NEXT: vle64.v v10, (a1)
1978 ; CHECK-NEXT: vfmul.vv v8, v8, v10
1979 ; CHECK-NEXT: vse64.v v8, (a0)
1981 %a = load <4 x double>, ptr %x
1982 %b = load <4 x double>, ptr %y
1983 %c = fmul <4 x double> %a, %b
1984 store <4 x double> %c, ptr %x
1988 define void @fdiv_v16bf16(ptr %x, ptr %y) {
1989 ; CHECK-LABEL: fdiv_v16bf16:
1991 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
1992 ; CHECK-NEXT: vle16.v v8, (a1)
1993 ; CHECK-NEXT: vle16.v v10, (a0)
1994 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
1995 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
1996 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
1997 ; CHECK-NEXT: vfdiv.vv v8, v16, v12
1998 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
1999 ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
2000 ; CHECK-NEXT: vse16.v v12, (a0)
2002 %a = load <16 x bfloat>, ptr %x
2003 %b = load <16 x bfloat>, ptr %y
2004 %c = fdiv <16 x bfloat> %a, %b
2005 store <16 x bfloat> %c, ptr %x
2009 define void @fdiv_v16f16(ptr %x, ptr %y) {
2010 ; ZVFH-LABEL: fdiv_v16f16:
2012 ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2013 ; ZVFH-NEXT: vle16.v v8, (a0)
2014 ; ZVFH-NEXT: vle16.v v10, (a1)
2015 ; ZVFH-NEXT: vfdiv.vv v8, v8, v10
2016 ; ZVFH-NEXT: vse16.v v8, (a0)
2019 ; ZVFHMIN-LABEL: fdiv_v16f16:
2021 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2022 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
2023 ; ZVFHMIN-NEXT: vle16.v v10, (a0)
2024 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
2025 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10
2026 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2027 ; ZVFHMIN-NEXT: vfdiv.vv v8, v16, v12
2028 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2029 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
2030 ; ZVFHMIN-NEXT: vse16.v v12, (a0)
2032 %a = load <16 x half>, ptr %x
2033 %b = load <16 x half>, ptr %y
2034 %c = fdiv <16 x half> %a, %b
2035 store <16 x half> %c, ptr %x
2039 define void @fdiv_v8f32(ptr %x, ptr %y) {
2040 ; CHECK-LABEL: fdiv_v8f32:
2042 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2043 ; CHECK-NEXT: vle32.v v8, (a0)
2044 ; CHECK-NEXT: vle32.v v10, (a1)
2045 ; CHECK-NEXT: vfdiv.vv v8, v8, v10
2046 ; CHECK-NEXT: vse32.v v8, (a0)
2048 %a = load <8 x float>, ptr %x
2049 %b = load <8 x float>, ptr %y
2050 %c = fdiv <8 x float> %a, %b
2051 store <8 x float> %c, ptr %x
2055 define void @fdiv_v4f64(ptr %x, ptr %y) {
2056 ; CHECK-LABEL: fdiv_v4f64:
2058 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2059 ; CHECK-NEXT: vle64.v v8, (a0)
2060 ; CHECK-NEXT: vle64.v v10, (a1)
2061 ; CHECK-NEXT: vfdiv.vv v8, v8, v10
2062 ; CHECK-NEXT: vse64.v v8, (a0)
2064 %a = load <4 x double>, ptr %x
2065 %b = load <4 x double>, ptr %y
2066 %c = fdiv <4 x double> %a, %b
2067 store <4 x double> %c, ptr %x
2071 define void @fneg_v16bf16(ptr %x) {
2072 ; CHECK-LABEL: fneg_v16bf16:
2074 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2075 ; CHECK-NEXT: vle16.v v8, (a0)
2076 ; CHECK-NEXT: lui a1, 8
2077 ; CHECK-NEXT: vxor.vx v8, v8, a1
2078 ; CHECK-NEXT: vse16.v v8, (a0)
2080 %a = load <16 x bfloat>, ptr %x
2081 %b = fneg <16 x bfloat> %a
2082 store <16 x bfloat> %b, ptr %x
2086 define void @fneg_v16f16(ptr %x) {
2087 ; ZVFH-LABEL: fneg_v16f16:
2089 ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2090 ; ZVFH-NEXT: vle16.v v8, (a0)
2091 ; ZVFH-NEXT: vfneg.v v8, v8
2092 ; ZVFH-NEXT: vse16.v v8, (a0)
2095 ; ZVFHMIN-LABEL: fneg_v16f16:
2097 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2098 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2099 ; ZVFHMIN-NEXT: lui a1, 8
2100 ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
2101 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
2103 %a = load <16 x half>, ptr %x
2104 %b = fneg <16 x half> %a
2105 store <16 x half> %b, ptr %x
2109 define void @fneg_v8f32(ptr %x) {
2110 ; CHECK-LABEL: fneg_v8f32:
2112 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2113 ; CHECK-NEXT: vle32.v v8, (a0)
2114 ; CHECK-NEXT: vfneg.v v8, v8
2115 ; CHECK-NEXT: vse32.v v8, (a0)
2117 %a = load <8 x float>, ptr %x
2118 %b = fneg <8 x float> %a
2119 store <8 x float> %b, ptr %x
2123 define void @fneg_v4f64(ptr %x) {
2124 ; CHECK-LABEL: fneg_v4f64:
2126 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2127 ; CHECK-NEXT: vle64.v v8, (a0)
2128 ; CHECK-NEXT: vfneg.v v8, v8
2129 ; CHECK-NEXT: vse64.v v8, (a0)
2131 %a = load <4 x double>, ptr %x
2132 %b = fneg <4 x double> %a
2133 store <4 x double> %b, ptr %x
2137 define void @fma_v16bf16(ptr %x, ptr %y, ptr %z) {
2138 ; CHECK-LABEL: fma_v16bf16:
2140 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2141 ; CHECK-NEXT: vle16.v v8, (a2)
2142 ; CHECK-NEXT: vle16.v v10, (a0)
2143 ; CHECK-NEXT: vle16.v v12, (a1)
2144 ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
2145 ; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10
2146 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12
2147 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2148 ; CHECK-NEXT: vfmadd.vv v8, v20, v16
2149 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2150 ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
2151 ; CHECK-NEXT: vse16.v v12, (a0)
2153 %a = load <16 x bfloat>, ptr %x
2154 %b = load <16 x bfloat>, ptr %y
2155 %c = load <16 x bfloat>, ptr %z
2156 %d = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b, <16 x bfloat> %c)
2157 store <16 x bfloat> %d, ptr %x
2161 define void @fma_v16f16(ptr %x, ptr %y, ptr %z) {
2162 ; ZVFH-LABEL: fma_v16f16:
2164 ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2165 ; ZVFH-NEXT: vle16.v v8, (a0)
2166 ; ZVFH-NEXT: vle16.v v10, (a1)
2167 ; ZVFH-NEXT: vle16.v v12, (a2)
2168 ; ZVFH-NEXT: vfmacc.vv v12, v8, v10
2169 ; ZVFH-NEXT: vse16.v v12, (a0)
2172 ; ZVFHMIN-LABEL: fma_v16f16:
2174 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
2175 ; ZVFHMIN-NEXT: vle16.v v8, (a2)
2176 ; ZVFHMIN-NEXT: vle16.v v10, (a0)
2177 ; ZVFHMIN-NEXT: vle16.v v12, (a1)
2178 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8
2179 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10
2180 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12
2181 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma
2182 ; ZVFHMIN-NEXT: vfmadd.vv v8, v20, v16
2183 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2184 ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8
2185 ; ZVFHMIN-NEXT: vse16.v v12, (a0)
2187 %a = load <16 x half>, ptr %x
2188 %b = load <16 x half>, ptr %y
2189 %c = load <16 x half>, ptr %z
2190 %d = call <16 x half> @llvm.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c)
2191 store <16 x half> %d, ptr %x
2195 define void @fma_v8f32(ptr %x, ptr %y, ptr %z) {
2196 ; CHECK-LABEL: fma_v8f32:
2198 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
2199 ; CHECK-NEXT: vle32.v v8, (a0)
2200 ; CHECK-NEXT: vle32.v v10, (a1)
2201 ; CHECK-NEXT: vle32.v v12, (a2)
2202 ; CHECK-NEXT: vfmacc.vv v12, v8, v10
2203 ; CHECK-NEXT: vse32.v v12, (a0)
2205 %a = load <8 x float>, ptr %x
2206 %b = load <8 x float>, ptr %y
2207 %c = load <8 x float>, ptr %z
2208 %d = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
2209 store <8 x float> %d, ptr %x
2213 define void @fma_v4f64(ptr %x, ptr %y, ptr %z) {
2214 ; CHECK-LABEL: fma_v4f64:
2216 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
2217 ; CHECK-NEXT: vle64.v v8, (a0)
2218 ; CHECK-NEXT: vle64.v v10, (a1)
2219 ; CHECK-NEXT: vle64.v v12, (a2)
2220 ; CHECK-NEXT: vfmacc.vv v12, v8, v10
2221 ; CHECK-NEXT: vse64.v v12, (a0)
2223 %a = load <4 x double>, ptr %x
2224 %b = load <4 x double>, ptr %y
2225 %c = load <4 x double>, ptr %z
2226 %d = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c)
2227 store <4 x double> %d, ptr %x
2231 define void @fadd_vf_v8bf16(ptr %x, bfloat %y) {
2232 ; CHECK-LABEL: fadd_vf_v8bf16:
2234 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2235 ; CHECK-NEXT: vle16.v v8, (a0)
2236 ; CHECK-NEXT: fmv.x.w a1, fa0
2237 ; CHECK-NEXT: vmv.v.x v9, a1
2238 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
2239 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
2240 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2241 ; CHECK-NEXT: vfadd.vv v8, v10, v12
2242 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2243 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
2244 ; CHECK-NEXT: vse16.v v10, (a0)
2246 %a = load <8 x bfloat>, ptr %x
2247 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
2248 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
2249 %d = fadd <8 x bfloat> %a, %c
2250 store <8 x bfloat> %d, ptr %x
2254 define void @fadd_vf_v6bf16(ptr %x, bfloat %y) {
2255 ; CHECK-LABEL: fadd_vf_v6bf16:
2257 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2258 ; CHECK-NEXT: vle16.v v8, (a0)
2259 ; CHECK-NEXT: fmv.x.w a1, fa0
2260 ; CHECK-NEXT: vmv.v.x v9, a1
2261 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
2262 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
2263 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2264 ; CHECK-NEXT: vfadd.vv v8, v10, v12
2265 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2266 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
2267 ; CHECK-NEXT: vse16.v v10, (a0)
2269 %a = load <6 x bfloat>, ptr %x
2270 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
2271 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
2272 %d = fadd <6 x bfloat> %a, %c
2273 store <6 x bfloat> %d, ptr %x
2277 define void @fadd_vf_v8f16(ptr %x, half %y) {
2278 ; ZVFH-LABEL: fadd_vf_v8f16:
2280 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2281 ; ZVFH-NEXT: vle16.v v8, (a0)
2282 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
2283 ; ZVFH-NEXT: vse16.v v8, (a0)
2286 ; ZVFHMIN-LABEL: fadd_vf_v8f16:
2288 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2289 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2290 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2291 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
2292 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
2293 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
2294 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2295 ; ZVFHMIN-NEXT: vfadd.vv v8, v10, v12
2296 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2297 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
2298 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
2300 %a = load <8 x half>, ptr %x
2301 %b = insertelement <8 x half> poison, half %y, i32 0
2302 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2303 %d = fadd <8 x half> %a, %c
2304 store <8 x half> %d, ptr %x
2308 define void @fadd_vf_v6f16(ptr %x, half %y) {
2309 ; ZVFH-LABEL: fadd_vf_v6f16:
2311 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2312 ; ZVFH-NEXT: vle16.v v8, (a0)
2313 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
2314 ; ZVFH-NEXT: vse16.v v8, (a0)
2317 ; ZVFHMIN-LABEL: fadd_vf_v6f16:
2319 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2320 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2321 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2322 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
2323 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
2324 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
2325 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2326 ; ZVFHMIN-NEXT: vfadd.vv v8, v10, v12
2327 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2328 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
2329 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
2331 %a = load <6 x half>, ptr %x
2332 %b = insertelement <6 x half> poison, half %y, i32 0
2333 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2334 %d = fadd <6 x half> %a, %c
2335 store <6 x half> %d, ptr %x
2339 define void @fadd_vf_v4f32(ptr %x, float %y) {
2340 ; CHECK-LABEL: fadd_vf_v4f32:
2342 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2343 ; CHECK-NEXT: vle32.v v8, (a0)
2344 ; CHECK-NEXT: vfadd.vf v8, v8, fa0
2345 ; CHECK-NEXT: vse32.v v8, (a0)
2347 %a = load <4 x float>, ptr %x
2348 %b = insertelement <4 x float> poison, float %y, i32 0
2349 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2350 %d = fadd <4 x float> %a, %c
2351 store <4 x float> %d, ptr %x
2355 define void @fadd_vf_v2f64(ptr %x, double %y) {
2356 ; CHECK-LABEL: fadd_vf_v2f64:
2358 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2359 ; CHECK-NEXT: vle64.v v8, (a0)
2360 ; CHECK-NEXT: vfadd.vf v8, v8, fa0
2361 ; CHECK-NEXT: vse64.v v8, (a0)
2363 %a = load <2 x double>, ptr %x
2364 %b = insertelement <2 x double> poison, double %y, i32 0
2365 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2366 %d = fadd <2 x double> %a, %c
2367 store <2 x double> %d, ptr %x
2371 define void @fadd_fv_v8bf16(ptr %x, bfloat %y) {
2372 ; CHECK-LABEL: fadd_fv_v8bf16:
2374 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2375 ; CHECK-NEXT: vle16.v v8, (a0)
2376 ; CHECK-NEXT: fmv.x.w a1, fa0
2377 ; CHECK-NEXT: vmv.v.x v9, a1
2378 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
2379 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
2380 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2381 ; CHECK-NEXT: vfadd.vv v8, v12, v10
2382 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2383 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
2384 ; CHECK-NEXT: vse16.v v10, (a0)
2386 %a = load <8 x bfloat>, ptr %x
2387 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
2388 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
2389 %d = fadd <8 x bfloat> %c, %a
2390 store <8 x bfloat> %d, ptr %x
2394 define void @fadd_fv_v6bf16(ptr %x, bfloat %y) {
2395 ; CHECK-LABEL: fadd_fv_v6bf16:
2397 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2398 ; CHECK-NEXT: vle16.v v8, (a0)
2399 ; CHECK-NEXT: fmv.x.w a1, fa0
2400 ; CHECK-NEXT: vmv.v.x v9, a1
2401 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
2402 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
2403 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2404 ; CHECK-NEXT: vfadd.vv v8, v12, v10
2405 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2406 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
2407 ; CHECK-NEXT: vse16.v v10, (a0)
2409 %a = load <6 x bfloat>, ptr %x
2410 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
2411 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
2412 %d = fadd <6 x bfloat> %c, %a
2413 store <6 x bfloat> %d, ptr %x
2417 define void @fadd_fv_v8f16(ptr %x, half %y) {
2418 ; ZVFH-LABEL: fadd_fv_v8f16:
2420 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2421 ; ZVFH-NEXT: vle16.v v8, (a0)
2422 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
2423 ; ZVFH-NEXT: vse16.v v8, (a0)
2426 ; ZVFHMIN-LABEL: fadd_fv_v8f16:
2428 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2429 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2430 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2431 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
2432 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
2433 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
2434 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2435 ; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10
2436 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2437 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
2438 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
2440 %a = load <8 x half>, ptr %x
2441 %b = insertelement <8 x half> poison, half %y, i32 0
2442 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2443 %d = fadd <8 x half> %c, %a
2444 store <8 x half> %d, ptr %x
2448 define void @fadd_fv_v6f16(ptr %x, half %y) {
2449 ; ZVFH-LABEL: fadd_fv_v6f16:
2451 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2452 ; ZVFH-NEXT: vle16.v v8, (a0)
2453 ; ZVFH-NEXT: vfadd.vf v8, v8, fa0
2454 ; ZVFH-NEXT: vse16.v v8, (a0)
2457 ; ZVFHMIN-LABEL: fadd_fv_v6f16:
2459 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2460 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2461 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2462 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
2463 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
2464 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
2465 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2466 ; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10
2467 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2468 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
2469 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
2471 %a = load <6 x half>, ptr %x
2472 %b = insertelement <6 x half> poison, half %y, i32 0
2473 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2474 %d = fadd <6 x half> %c, %a
2475 store <6 x half> %d, ptr %x
2479 define void @fadd_fv_v4f32(ptr %x, float %y) {
2480 ; CHECK-LABEL: fadd_fv_v4f32:
2482 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2483 ; CHECK-NEXT: vle32.v v8, (a0)
2484 ; CHECK-NEXT: vfadd.vf v8, v8, fa0
2485 ; CHECK-NEXT: vse32.v v8, (a0)
2487 %a = load <4 x float>, ptr %x
2488 %b = insertelement <4 x float> poison, float %y, i32 0
2489 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2490 %d = fadd <4 x float> %c, %a
2491 store <4 x float> %d, ptr %x
2495 define void @fadd_fv_v2f64(ptr %x, double %y) {
2496 ; CHECK-LABEL: fadd_fv_v2f64:
2498 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2499 ; CHECK-NEXT: vle64.v v8, (a0)
2500 ; CHECK-NEXT: vfadd.vf v8, v8, fa0
2501 ; CHECK-NEXT: vse64.v v8, (a0)
2503 %a = load <2 x double>, ptr %x
2504 %b = insertelement <2 x double> poison, double %y, i32 0
2505 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2506 %d = fadd <2 x double> %c, %a
2507 store <2 x double> %d, ptr %x
2511 define void @fsub_vf_v8bf16(ptr %x, bfloat %y) {
2512 ; CHECK-LABEL: fsub_vf_v8bf16:
2514 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2515 ; CHECK-NEXT: vle16.v v8, (a0)
2516 ; CHECK-NEXT: fmv.x.w a1, fa0
2517 ; CHECK-NEXT: vmv.v.x v9, a1
2518 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
2519 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
2520 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2521 ; CHECK-NEXT: vfsub.vv v8, v10, v12
2522 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2523 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
2524 ; CHECK-NEXT: vse16.v v10, (a0)
2526 %a = load <8 x bfloat>, ptr %x
2527 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
2528 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
2529 %d = fsub <8 x bfloat> %a, %c
2530 store <8 x bfloat> %d, ptr %x
2534 define void @fsub_vf_v6bf16(ptr %x, bfloat %y) {
2535 ; CHECK-LABEL: fsub_vf_v6bf16:
2537 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2538 ; CHECK-NEXT: vle16.v v8, (a0)
2539 ; CHECK-NEXT: fmv.x.w a1, fa0
2540 ; CHECK-NEXT: vmv.v.x v9, a1
2541 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
2542 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
2543 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2544 ; CHECK-NEXT: vfsub.vv v8, v10, v12
2545 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2546 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
2547 ; CHECK-NEXT: vse16.v v10, (a0)
2549 %a = load <6 x bfloat>, ptr %x
2550 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
2551 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
2552 %d = fsub <6 x bfloat> %a, %c
2553 store <6 x bfloat> %d, ptr %x
2557 define void @fsub_vf_v8f16(ptr %x, half %y) {
2558 ; ZVFH-LABEL: fsub_vf_v8f16:
2560 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2561 ; ZVFH-NEXT: vle16.v v8, (a0)
2562 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0
2563 ; ZVFH-NEXT: vse16.v v8, (a0)
2566 ; ZVFHMIN-LABEL: fsub_vf_v8f16:
2568 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2569 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2570 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2571 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
2572 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
2573 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
2574 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2575 ; ZVFHMIN-NEXT: vfsub.vv v8, v10, v12
2576 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2577 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
2578 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
2580 %a = load <8 x half>, ptr %x
2581 %b = insertelement <8 x half> poison, half %y, i32 0
2582 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2583 %d = fsub <8 x half> %a, %c
2584 store <8 x half> %d, ptr %x
2588 define void @fsub_vf_v6f16(ptr %x, half %y) {
2589 ; ZVFH-LABEL: fsub_vf_v6f16:
2591 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2592 ; ZVFH-NEXT: vle16.v v8, (a0)
2593 ; ZVFH-NEXT: vfsub.vf v8, v8, fa0
2594 ; ZVFH-NEXT: vse16.v v8, (a0)
2597 ; ZVFHMIN-LABEL: fsub_vf_v6f16:
2599 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2600 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2601 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2602 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
2603 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
2604 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
2605 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2606 ; ZVFHMIN-NEXT: vfsub.vv v8, v10, v12
2607 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2608 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
2609 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
2611 %a = load <6 x half>, ptr %x
2612 %b = insertelement <6 x half> poison, half %y, i32 0
2613 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2614 %d = fsub <6 x half> %a, %c
2615 store <6 x half> %d, ptr %x
2619 define void @fsub_vf_v4f32(ptr %x, float %y) {
2620 ; CHECK-LABEL: fsub_vf_v4f32:
2622 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2623 ; CHECK-NEXT: vle32.v v8, (a0)
2624 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
2625 ; CHECK-NEXT: vse32.v v8, (a0)
2627 %a = load <4 x float>, ptr %x
2628 %b = insertelement <4 x float> poison, float %y, i32 0
2629 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2630 %d = fsub <4 x float> %a, %c
2631 store <4 x float> %d, ptr %x
2635 define void @fsub_vf_v2f64(ptr %x, double %y) {
2636 ; CHECK-LABEL: fsub_vf_v2f64:
2638 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2639 ; CHECK-NEXT: vle64.v v8, (a0)
2640 ; CHECK-NEXT: vfsub.vf v8, v8, fa0
2641 ; CHECK-NEXT: vse64.v v8, (a0)
2643 %a = load <2 x double>, ptr %x
2644 %b = insertelement <2 x double> poison, double %y, i32 0
2645 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2646 %d = fsub <2 x double> %a, %c
2647 store <2 x double> %d, ptr %x
2651 define void @fsub_fv_v8bf16(ptr %x, bfloat %y) {
2652 ; CHECK-LABEL: fsub_fv_v8bf16:
2654 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2655 ; CHECK-NEXT: vle16.v v8, (a0)
2656 ; CHECK-NEXT: fmv.x.w a1, fa0
2657 ; CHECK-NEXT: vmv.v.x v9, a1
2658 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
2659 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
2660 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2661 ; CHECK-NEXT: vfsub.vv v8, v12, v10
2662 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2663 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
2664 ; CHECK-NEXT: vse16.v v10, (a0)
2666 %a = load <8 x bfloat>, ptr %x
2667 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
2668 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
2669 %d = fsub <8 x bfloat> %c, %a
2670 store <8 x bfloat> %d, ptr %x
2674 define void @fsub_fv_v6bf16(ptr %x, bfloat %y) {
2675 ; CHECK-LABEL: fsub_fv_v6bf16:
2677 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2678 ; CHECK-NEXT: vle16.v v8, (a0)
2679 ; CHECK-NEXT: fmv.x.w a1, fa0
2680 ; CHECK-NEXT: vmv.v.x v9, a1
2681 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
2682 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
2683 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2684 ; CHECK-NEXT: vfsub.vv v8, v12, v10
2685 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2686 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
2687 ; CHECK-NEXT: vse16.v v10, (a0)
2689 %a = load <6 x bfloat>, ptr %x
2690 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
2691 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
2692 %d = fsub <6 x bfloat> %c, %a
2693 store <6 x bfloat> %d, ptr %x
2697 define void @fsub_fv_v8f16(ptr %x, half %y) {
2698 ; ZVFH-LABEL: fsub_fv_v8f16:
2700 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2701 ; ZVFH-NEXT: vle16.v v8, (a0)
2702 ; ZVFH-NEXT: vfrsub.vf v8, v8, fa0
2703 ; ZVFH-NEXT: vse16.v v8, (a0)
2706 ; ZVFHMIN-LABEL: fsub_fv_v8f16:
2708 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2709 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2710 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2711 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
2712 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
2713 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
2714 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2715 ; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10
2716 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2717 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
2718 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
2720 %a = load <8 x half>, ptr %x
2721 %b = insertelement <8 x half> poison, half %y, i32 0
2722 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2723 %d = fsub <8 x half> %c, %a
2724 store <8 x half> %d, ptr %x
2728 define void @fsub_fv_v6f16(ptr %x, half %y) {
2729 ; ZVFH-LABEL: fsub_fv_v6f16:
2731 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2732 ; ZVFH-NEXT: vle16.v v8, (a0)
2733 ; ZVFH-NEXT: vfrsub.vf v8, v8, fa0
2734 ; ZVFH-NEXT: vse16.v v8, (a0)
2737 ; ZVFHMIN-LABEL: fsub_fv_v6f16:
2739 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2740 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2741 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2742 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
2743 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
2744 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
2745 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2746 ; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10
2747 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2748 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
2749 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
2751 %a = load <6 x half>, ptr %x
2752 %b = insertelement <6 x half> poison, half %y, i32 0
2753 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2754 %d = fsub <6 x half> %c, %a
2755 store <6 x half> %d, ptr %x
2759 define void @fsub_fv_v4f32(ptr %x, float %y) {
2760 ; CHECK-LABEL: fsub_fv_v4f32:
2762 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2763 ; CHECK-NEXT: vle32.v v8, (a0)
2764 ; CHECK-NEXT: vfrsub.vf v8, v8, fa0
2765 ; CHECK-NEXT: vse32.v v8, (a0)
2767 %a = load <4 x float>, ptr %x
2768 %b = insertelement <4 x float> poison, float %y, i32 0
2769 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2770 %d = fsub <4 x float> %c, %a
2771 store <4 x float> %d, ptr %x
2775 define void @fsub_fv_v2f64(ptr %x, double %y) {
2776 ; CHECK-LABEL: fsub_fv_v2f64:
2778 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2779 ; CHECK-NEXT: vle64.v v8, (a0)
2780 ; CHECK-NEXT: vfrsub.vf v8, v8, fa0
2781 ; CHECK-NEXT: vse64.v v8, (a0)
2783 %a = load <2 x double>, ptr %x
2784 %b = insertelement <2 x double> poison, double %y, i32 0
2785 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2786 %d = fsub <2 x double> %c, %a
2787 store <2 x double> %d, ptr %x
2791 define void @fmul_vf_v8bf16(ptr %x, bfloat %y) {
2792 ; CHECK-LABEL: fmul_vf_v8bf16:
2794 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2795 ; CHECK-NEXT: vle16.v v8, (a0)
2796 ; CHECK-NEXT: fmv.x.w a1, fa0
2797 ; CHECK-NEXT: vmv.v.x v9, a1
2798 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
2799 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
2800 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2801 ; CHECK-NEXT: vfmul.vv v8, v10, v12
2802 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2803 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
2804 ; CHECK-NEXT: vse16.v v10, (a0)
2806 %a = load <8 x bfloat>, ptr %x
2807 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
2808 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
2809 %d = fmul <8 x bfloat> %a, %c
2810 store <8 x bfloat> %d, ptr %x
2814 define void @fmul_vf_v6bf16(ptr %x, bfloat %y) {
2815 ; CHECK-LABEL: fmul_vf_v6bf16:
2817 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2818 ; CHECK-NEXT: vle16.v v8, (a0)
2819 ; CHECK-NEXT: fmv.x.w a1, fa0
2820 ; CHECK-NEXT: vmv.v.x v9, a1
2821 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
2822 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
2823 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2824 ; CHECK-NEXT: vfmul.vv v8, v10, v12
2825 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2826 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
2827 ; CHECK-NEXT: vse16.v v10, (a0)
2829 %a = load <6 x bfloat>, ptr %x
2830 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
2831 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
2832 %d = fmul <6 x bfloat> %a, %c
2833 store <6 x bfloat> %d, ptr %x
2837 define void @fmul_vf_v8f16(ptr %x, half %y) {
2838 ; ZVFH-LABEL: fmul_vf_v8f16:
2840 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2841 ; ZVFH-NEXT: vle16.v v8, (a0)
2842 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
2843 ; ZVFH-NEXT: vse16.v v8, (a0)
2846 ; ZVFHMIN-LABEL: fmul_vf_v8f16:
2848 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2849 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2850 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2851 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
2852 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
2853 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
2854 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2855 ; ZVFHMIN-NEXT: vfmul.vv v8, v10, v12
2856 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2857 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
2858 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
2860 %a = load <8 x half>, ptr %x
2861 %b = insertelement <8 x half> poison, half %y, i32 0
2862 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
2863 %d = fmul <8 x half> %a, %c
2864 store <8 x half> %d, ptr %x
2868 define void @fmul_vf_v6f16(ptr %x, half %y) {
2869 ; ZVFH-LABEL: fmul_vf_v6f16:
2871 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2872 ; ZVFH-NEXT: vle16.v v8, (a0)
2873 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
2874 ; ZVFH-NEXT: vse16.v v8, (a0)
2877 ; ZVFHMIN-LABEL: fmul_vf_v6f16:
2879 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2880 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2881 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2882 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
2883 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
2884 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
2885 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2886 ; ZVFHMIN-NEXT: vfmul.vv v8, v10, v12
2887 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2888 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
2889 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
2891 %a = load <6 x half>, ptr %x
2892 %b = insertelement <6 x half> poison, half %y, i32 0
2893 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
2894 %d = fmul <6 x half> %a, %c
2895 store <6 x half> %d, ptr %x
2899 define void @fmul_vf_v4f32(ptr %x, float %y) {
2900 ; CHECK-LABEL: fmul_vf_v4f32:
2902 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
2903 ; CHECK-NEXT: vle32.v v8, (a0)
2904 ; CHECK-NEXT: vfmul.vf v8, v8, fa0
2905 ; CHECK-NEXT: vse32.v v8, (a0)
2907 %a = load <4 x float>, ptr %x
2908 %b = insertelement <4 x float> poison, float %y, i32 0
2909 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
2910 %d = fmul <4 x float> %a, %c
2911 store <4 x float> %d, ptr %x
2915 define void @fmul_vf_v2f64(ptr %x, double %y) {
2916 ; CHECK-LABEL: fmul_vf_v2f64:
2918 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
2919 ; CHECK-NEXT: vle64.v v8, (a0)
2920 ; CHECK-NEXT: vfmul.vf v8, v8, fa0
2921 ; CHECK-NEXT: vse64.v v8, (a0)
2923 %a = load <2 x double>, ptr %x
2924 %b = insertelement <2 x double> poison, double %y, i32 0
2925 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
2926 %d = fmul <2 x double> %a, %c
2927 store <2 x double> %d, ptr %x
2931 define void @fmul_fv_v8bf16(ptr %x, bfloat %y) {
2932 ; CHECK-LABEL: fmul_fv_v8bf16:
2934 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2935 ; CHECK-NEXT: vle16.v v8, (a0)
2936 ; CHECK-NEXT: fmv.x.w a1, fa0
2937 ; CHECK-NEXT: vmv.v.x v9, a1
2938 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
2939 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
2940 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2941 ; CHECK-NEXT: vfmul.vv v8, v12, v10
2942 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2943 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
2944 ; CHECK-NEXT: vse16.v v10, (a0)
2946 %a = load <8 x bfloat>, ptr %x
2947 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
2948 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
2949 %d = fmul <8 x bfloat> %c, %a
2950 store <8 x bfloat> %d, ptr %x
2954 define void @fmul_fv_v6bf16(ptr %x, bfloat %y) {
2955 ; CHECK-LABEL: fmul_fv_v6bf16:
2957 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
2958 ; CHECK-NEXT: vle16.v v8, (a0)
2959 ; CHECK-NEXT: fmv.x.w a1, fa0
2960 ; CHECK-NEXT: vmv.v.x v9, a1
2961 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
2962 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
2963 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2964 ; CHECK-NEXT: vfmul.vv v8, v12, v10
2965 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2966 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
2967 ; CHECK-NEXT: vse16.v v10, (a0)
2969 %a = load <6 x bfloat>, ptr %x
2970 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
2971 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
2972 %d = fmul <6 x bfloat> %c, %a
2973 store <6 x bfloat> %d, ptr %x
2977 define void @fmul_fv_v8f16(ptr %x, half %y) {
2978 ; ZVFH-LABEL: fmul_fv_v8f16:
2980 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2981 ; ZVFH-NEXT: vle16.v v8, (a0)
2982 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
2983 ; ZVFH-NEXT: vse16.v v8, (a0)
2986 ; ZVFHMIN-LABEL: fmul_fv_v8f16:
2988 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
2989 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
2990 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
2991 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
2992 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
2993 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
2994 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
2995 ; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10
2996 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2997 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
2998 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
3000 %a = load <8 x half>, ptr %x
3001 %b = insertelement <8 x half> poison, half %y, i32 0
3002 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
3003 %d = fmul <8 x half> %c, %a
3004 store <8 x half> %d, ptr %x
3008 define void @fmul_fv_v6f16(ptr %x, half %y) {
3009 ; ZVFH-LABEL: fmul_fv_v6f16:
3011 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3012 ; ZVFH-NEXT: vle16.v v8, (a0)
3013 ; ZVFH-NEXT: vfmul.vf v8, v8, fa0
3014 ; ZVFH-NEXT: vse16.v v8, (a0)
3017 ; ZVFHMIN-LABEL: fmul_fv_v6f16:
3019 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3020 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3021 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
3022 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
3023 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
3024 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
3025 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3026 ; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10
3027 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3028 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
3029 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
3031 %a = load <6 x half>, ptr %x
3032 %b = insertelement <6 x half> poison, half %y, i32 0
3033 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
3034 %d = fmul <6 x half> %c, %a
3035 store <6 x half> %d, ptr %x
3039 define void @fmul_fv_v4f32(ptr %x, float %y) {
3040 ; CHECK-LABEL: fmul_fv_v4f32:
3042 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3043 ; CHECK-NEXT: vle32.v v8, (a0)
3044 ; CHECK-NEXT: vfmul.vf v8, v8, fa0
3045 ; CHECK-NEXT: vse32.v v8, (a0)
3047 %a = load <4 x float>, ptr %x
3048 %b = insertelement <4 x float> poison, float %y, i32 0
3049 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
3050 %d = fmul <4 x float> %c, %a
3051 store <4 x float> %d, ptr %x
3055 define void @fmul_fv_v2f64(ptr %x, double %y) {
3056 ; CHECK-LABEL: fmul_fv_v2f64:
3058 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3059 ; CHECK-NEXT: vle64.v v8, (a0)
3060 ; CHECK-NEXT: vfmul.vf v8, v8, fa0
3061 ; CHECK-NEXT: vse64.v v8, (a0)
3063 %a = load <2 x double>, ptr %x
3064 %b = insertelement <2 x double> poison, double %y, i32 0
3065 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
3066 %d = fmul <2 x double> %c, %a
3067 store <2 x double> %d, ptr %x
3071 define void @fdiv_vf_v8bf16(ptr %x, bfloat %y) {
3072 ; CHECK-LABEL: fdiv_vf_v8bf16:
3074 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3075 ; CHECK-NEXT: vle16.v v8, (a0)
3076 ; CHECK-NEXT: fmv.x.w a1, fa0
3077 ; CHECK-NEXT: vmv.v.x v9, a1
3078 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
3079 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
3080 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3081 ; CHECK-NEXT: vfdiv.vv v8, v10, v12
3082 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3083 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
3084 ; CHECK-NEXT: vse16.v v10, (a0)
3086 %a = load <8 x bfloat>, ptr %x
3087 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
3088 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
3089 %d = fdiv <8 x bfloat> %a, %c
3090 store <8 x bfloat> %d, ptr %x
3094 define void @fdiv_vf_v6bf16(ptr %x, bfloat %y) {
3095 ; CHECK-LABEL: fdiv_vf_v6bf16:
3097 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3098 ; CHECK-NEXT: vle16.v v8, (a0)
3099 ; CHECK-NEXT: fmv.x.w a1, fa0
3100 ; CHECK-NEXT: vmv.v.x v9, a1
3101 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
3102 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
3103 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3104 ; CHECK-NEXT: vfdiv.vv v8, v10, v12
3105 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3106 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
3107 ; CHECK-NEXT: vse16.v v10, (a0)
3109 %a = load <6 x bfloat>, ptr %x
3110 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
3111 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
3112 %d = fdiv <6 x bfloat> %a, %c
3113 store <6 x bfloat> %d, ptr %x
3117 define void @fdiv_vf_v8f16(ptr %x, half %y) {
3118 ; ZVFH-LABEL: fdiv_vf_v8f16:
3120 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3121 ; ZVFH-NEXT: vle16.v v8, (a0)
3122 ; ZVFH-NEXT: vfdiv.vf v8, v8, fa0
3123 ; ZVFH-NEXT: vse16.v v8, (a0)
3126 ; ZVFHMIN-LABEL: fdiv_vf_v8f16:
3128 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3129 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3130 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
3131 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
3132 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
3133 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
3134 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3135 ; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v12
3136 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3137 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
3138 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
3140 %a = load <8 x half>, ptr %x
3141 %b = insertelement <8 x half> poison, half %y, i32 0
3142 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
3143 %d = fdiv <8 x half> %a, %c
3144 store <8 x half> %d, ptr %x
3148 define void @fdiv_vf_v6f16(ptr %x, half %y) {
3149 ; ZVFH-LABEL: fdiv_vf_v6f16:
3151 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3152 ; ZVFH-NEXT: vle16.v v8, (a0)
3153 ; ZVFH-NEXT: vfdiv.vf v8, v8, fa0
3154 ; ZVFH-NEXT: vse16.v v8, (a0)
3157 ; ZVFHMIN-LABEL: fdiv_vf_v6f16:
3159 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3160 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3161 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
3162 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
3163 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
3164 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
3165 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3166 ; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v12
3167 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3168 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
3169 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
3171 %a = load <6 x half>, ptr %x
3172 %b = insertelement <6 x half> poison, half %y, i32 0
3173 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
3174 %d = fdiv <6 x half> %a, %c
3175 store <6 x half> %d, ptr %x
3179 define void @fdiv_vf_v4f32(ptr %x, float %y) {
3180 ; CHECK-LABEL: fdiv_vf_v4f32:
3182 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3183 ; CHECK-NEXT: vle32.v v8, (a0)
3184 ; CHECK-NEXT: vfdiv.vf v8, v8, fa0
3185 ; CHECK-NEXT: vse32.v v8, (a0)
3187 %a = load <4 x float>, ptr %x
3188 %b = insertelement <4 x float> poison, float %y, i32 0
3189 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
3190 %d = fdiv <4 x float> %a, %c
3191 store <4 x float> %d, ptr %x
3195 define void @fdiv_vf_v2f64(ptr %x, double %y) {
3196 ; CHECK-LABEL: fdiv_vf_v2f64:
3198 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3199 ; CHECK-NEXT: vle64.v v8, (a0)
3200 ; CHECK-NEXT: vfdiv.vf v8, v8, fa0
3201 ; CHECK-NEXT: vse64.v v8, (a0)
3203 %a = load <2 x double>, ptr %x
3204 %b = insertelement <2 x double> poison, double %y, i32 0
3205 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
3206 %d = fdiv <2 x double> %a, %c
3207 store <2 x double> %d, ptr %x
3211 define void @fdiv_fv_v8bf16(ptr %x, bfloat %y) {
3212 ; CHECK-LABEL: fdiv_fv_v8bf16:
3214 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3215 ; CHECK-NEXT: vle16.v v8, (a0)
3216 ; CHECK-NEXT: fmv.x.w a1, fa0
3217 ; CHECK-NEXT: vmv.v.x v9, a1
3218 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
3219 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
3220 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3221 ; CHECK-NEXT: vfdiv.vv v8, v12, v10
3222 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3223 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
3224 ; CHECK-NEXT: vse16.v v10, (a0)
3226 %a = load <8 x bfloat>, ptr %x
3227 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
3228 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
3229 %d = fdiv <8 x bfloat> %c, %a
3230 store <8 x bfloat> %d, ptr %x
3234 define void @fdiv_fv_v6bf16(ptr %x, bfloat %y) {
3235 ; CHECK-LABEL: fdiv_fv_v6bf16:
3237 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3238 ; CHECK-NEXT: vle16.v v8, (a0)
3239 ; CHECK-NEXT: fmv.x.w a1, fa0
3240 ; CHECK-NEXT: vmv.v.x v9, a1
3241 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
3242 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
3243 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3244 ; CHECK-NEXT: vfdiv.vv v8, v12, v10
3245 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3246 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
3247 ; CHECK-NEXT: vse16.v v10, (a0)
3249 %a = load <6 x bfloat>, ptr %x
3250 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
3251 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
3252 %d = fdiv <6 x bfloat> %c, %a
3253 store <6 x bfloat> %d, ptr %x
3257 define void @fdiv_fv_v8f16(ptr %x, half %y) {
3258 ; ZVFH-LABEL: fdiv_fv_v8f16:
3260 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3261 ; ZVFH-NEXT: vle16.v v8, (a0)
3262 ; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0
3263 ; ZVFH-NEXT: vse16.v v8, (a0)
3266 ; ZVFHMIN-LABEL: fdiv_fv_v8f16:
3268 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3269 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3270 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
3271 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
3272 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
3273 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
3274 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3275 ; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10
3276 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3277 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
3278 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
3280 %a = load <8 x half>, ptr %x
3281 %b = insertelement <8 x half> poison, half %y, i32 0
3282 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer
3283 %d = fdiv <8 x half> %c, %a
3284 store <8 x half> %d, ptr %x
3288 define void @fdiv_fv_v6f16(ptr %x, half %y) {
3289 ; ZVFH-LABEL: fdiv_fv_v6f16:
3291 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3292 ; ZVFH-NEXT: vle16.v v8, (a0)
3293 ; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0
3294 ; ZVFH-NEXT: vse16.v v8, (a0)
3297 ; ZVFHMIN-LABEL: fdiv_fv_v6f16:
3299 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3300 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3301 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
3302 ; ZVFHMIN-NEXT: vmv.v.x v9, a1
3303 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
3304 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
3305 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3306 ; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10
3307 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3308 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
3309 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
3311 %a = load <6 x half>, ptr %x
3312 %b = insertelement <6 x half> poison, half %y, i32 0
3313 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
3314 %d = fdiv <6 x half> %c, %a
3315 store <6 x half> %d, ptr %x
3319 define void @fdiv_fv_v4f32(ptr %x, float %y) {
3320 ; CHECK-LABEL: fdiv_fv_v4f32:
3322 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3323 ; CHECK-NEXT: vle32.v v8, (a0)
3324 ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0
3325 ; CHECK-NEXT: vse32.v v8, (a0)
3327 %a = load <4 x float>, ptr %x
3328 %b = insertelement <4 x float> poison, float %y, i32 0
3329 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer
3330 %d = fdiv <4 x float> %c, %a
3331 store <4 x float> %d, ptr %x
3335 define void @fdiv_fv_v2f64(ptr %x, double %y) {
3336 ; CHECK-LABEL: fdiv_fv_v2f64:
3338 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3339 ; CHECK-NEXT: vle64.v v8, (a0)
3340 ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0
3341 ; CHECK-NEXT: vse64.v v8, (a0)
3343 %a = load <2 x double>, ptr %x
3344 %b = insertelement <2 x double> poison, double %y, i32 0
3345 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer
3346 %d = fdiv <2 x double> %c, %a
3347 store <2 x double> %d, ptr %x
3351 define void @fma_vf_v8bf16(ptr %x, ptr %y, bfloat %z) {
3352 ; CHECK-LABEL: fma_vf_v8bf16:
3354 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3355 ; CHECK-NEXT: vle16.v v8, (a1)
3356 ; CHECK-NEXT: vle16.v v9, (a0)
3357 ; CHECK-NEXT: fmv.x.w a1, fa0
3358 ; CHECK-NEXT: vmv.v.x v10, a1
3359 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
3360 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
3361 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
3362 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3363 ; CHECK-NEXT: vfmadd.vv v8, v14, v12
3364 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3365 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
3366 ; CHECK-NEXT: vse16.v v10, (a0)
3368 %a = load <8 x bfloat>, ptr %x
3369 %b = load <8 x bfloat>, ptr %y
3370 %c = insertelement <8 x bfloat> poison, bfloat %z, i32 0
3371 %d = shufflevector <8 x bfloat> %c, <8 x bfloat> poison, <8 x i32> zeroinitializer
3372 %e = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %a, <8 x bfloat> %d, <8 x bfloat> %b)
3373 store <8 x bfloat> %e, ptr %x
3377 define void @fma_vf_v6bf16(ptr %x, ptr %y, bfloat %z) {
3378 ; CHECK-LABEL: fma_vf_v6bf16:
3380 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3381 ; CHECK-NEXT: vle16.v v8, (a1)
3382 ; CHECK-NEXT: vle16.v v9, (a0)
3383 ; CHECK-NEXT: fmv.x.w a1, fa0
3384 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3385 ; CHECK-NEXT: vmv.v.x v10, a1
3386 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
3387 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
3388 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
3389 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3390 ; CHECK-NEXT: vfmadd.vv v8, v14, v12
3391 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3392 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
3393 ; CHECK-NEXT: vse16.v v10, (a0)
3395 %a = load <6 x bfloat>, ptr %x
3396 %b = load <6 x bfloat>, ptr %y
3397 %c = insertelement <6 x bfloat> poison, bfloat %z, i32 0
3398 %d = shufflevector <6 x bfloat> %c, <6 x bfloat> poison, <6 x i32> zeroinitializer
3399 %e = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %a, <6 x bfloat> %d, <6 x bfloat> %b)
3400 store <6 x bfloat> %e, ptr %x
3404 define void @fma_vf_v8f16(ptr %x, ptr %y, half %z) {
3405 ; ZVFH-LABEL: fma_vf_v8f16:
3407 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3408 ; ZVFH-NEXT: vle16.v v8, (a0)
3409 ; ZVFH-NEXT: vle16.v v9, (a1)
3410 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
3411 ; ZVFH-NEXT: vse16.v v9, (a0)
3414 ; ZVFHMIN-LABEL: fma_vf_v8f16:
3416 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3417 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
3418 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
3419 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
3420 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
3421 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
3422 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
3423 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
3424 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3425 ; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12
3426 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3427 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
3428 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
3430 %a = load <8 x half>, ptr %x
3431 %b = load <8 x half>, ptr %y
3432 %c = insertelement <8 x half> poison, half %z, i32 0
3433 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
3434 %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %b)
3435 store <8 x half> %e, ptr %x
3439 define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
3440 ; ZVFH-LABEL: fma_vf_v6f16:
3442 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3443 ; ZVFH-NEXT: vle16.v v8, (a0)
3444 ; ZVFH-NEXT: vle16.v v9, (a1)
3445 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
3446 ; ZVFH-NEXT: vse16.v v9, (a0)
3449 ; ZVFHMIN-LABEL: fma_vf_v6f16:
3451 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3452 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
3453 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
3454 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
3455 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3456 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
3457 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
3458 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
3459 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
3460 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3461 ; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12
3462 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3463 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
3464 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
3466 %a = load <6 x half>, ptr %x
3467 %b = load <6 x half>, ptr %y
3468 %c = insertelement <6 x half> poison, half %z, i32 0
3469 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
3470 %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %d, <6 x half> %b)
3471 store <6 x half> %e, ptr %x
3475 define void @fma_vf_v4f32(ptr %x, ptr %y, float %z) {
3476 ; CHECK-LABEL: fma_vf_v4f32:
3478 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3479 ; CHECK-NEXT: vle32.v v8, (a0)
3480 ; CHECK-NEXT: vle32.v v9, (a1)
3481 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
3482 ; CHECK-NEXT: vse32.v v9, (a0)
3484 %a = load <4 x float>, ptr %x
3485 %b = load <4 x float>, ptr %y
3486 %c = insertelement <4 x float> poison, float %z, i32 0
3487 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
3488 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %d, <4 x float> %b)
3489 store <4 x float> %e, ptr %x
3493 define void @fma_vf_v2f64(ptr %x, ptr %y, double %z) {
3494 ; CHECK-LABEL: fma_vf_v2f64:
3496 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3497 ; CHECK-NEXT: vle64.v v8, (a0)
3498 ; CHECK-NEXT: vle64.v v9, (a1)
3499 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
3500 ; CHECK-NEXT: vse64.v v9, (a0)
3502 %a = load <2 x double>, ptr %x
3503 %b = load <2 x double>, ptr %y
3504 %c = insertelement <2 x double> poison, double %z, i32 0
3505 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
3506 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %d, <2 x double> %b)
3507 store <2 x double> %e, ptr %x
3511 define void @fma_fv_v8bf16(ptr %x, ptr %y, bfloat %z) {
3512 ; CHECK-LABEL: fma_fv_v8bf16:
3514 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3515 ; CHECK-NEXT: vle16.v v8, (a1)
3516 ; CHECK-NEXT: vle16.v v9, (a0)
3517 ; CHECK-NEXT: fmv.x.w a1, fa0
3518 ; CHECK-NEXT: vmv.v.x v10, a1
3519 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
3520 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
3521 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
3522 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3523 ; CHECK-NEXT: vfmadd.vv v8, v14, v12
3524 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3525 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
3526 ; CHECK-NEXT: vse16.v v10, (a0)
3528 %a = load <8 x bfloat>, ptr %x
3529 %b = load <8 x bfloat>, ptr %y
3530 %c = insertelement <8 x bfloat> poison, bfloat %z, i32 0
3531 %d = shufflevector <8 x bfloat> %c, <8 x bfloat> poison, <8 x i32> zeroinitializer
3532 %e = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %d, <8 x bfloat> %a, <8 x bfloat> %b)
3533 store <8 x bfloat> %e, ptr %x
3537 define void @fma_fv_v6bf16(ptr %x, ptr %y, bfloat %z) {
3538 ; CHECK-LABEL: fma_fv_v6bf16:
3540 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3541 ; CHECK-NEXT: vle16.v v8, (a1)
3542 ; CHECK-NEXT: vle16.v v9, (a0)
3543 ; CHECK-NEXT: fmv.x.w a1, fa0
3544 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3545 ; CHECK-NEXT: vmv.v.x v10, a1
3546 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
3547 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
3548 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
3549 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3550 ; CHECK-NEXT: vfmadd.vv v8, v14, v12
3551 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3552 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
3553 ; CHECK-NEXT: vse16.v v10, (a0)
3555 %a = load <6 x bfloat>, ptr %x
3556 %b = load <6 x bfloat>, ptr %y
3557 %c = insertelement <6 x bfloat> poison, bfloat %z, i32 0
3558 %d = shufflevector <6 x bfloat> %c, <6 x bfloat> poison, <6 x i32> zeroinitializer
3559 %e = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %d, <6 x bfloat> %a, <6 x bfloat> %b)
3560 store <6 x bfloat> %e, ptr %x
3564 define void @fma_fv_v8f16(ptr %x, ptr %y, half %z) {
3565 ; ZVFH-LABEL: fma_fv_v8f16:
3567 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3568 ; ZVFH-NEXT: vle16.v v8, (a0)
3569 ; ZVFH-NEXT: vle16.v v9, (a1)
3570 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
3571 ; ZVFH-NEXT: vse16.v v9, (a0)
3574 ; ZVFHMIN-LABEL: fma_fv_v8f16:
3576 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3577 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
3578 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
3579 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
3580 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
3581 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
3582 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
3583 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
3584 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3585 ; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12
3586 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3587 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
3588 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
3590 %a = load <8 x half>, ptr %x
3591 %b = load <8 x half>, ptr %y
3592 %c = insertelement <8 x half> poison, half %z, i32 0
3593 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
3594 %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %d, <8 x half> %a, <8 x half> %b)
3595 store <8 x half> %e, ptr %x
3599 define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
3600 ; ZVFH-LABEL: fma_fv_v6f16:
3602 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3603 ; ZVFH-NEXT: vle16.v v8, (a0)
3604 ; ZVFH-NEXT: vle16.v v9, (a1)
3605 ; ZVFH-NEXT: vfmacc.vf v9, fa0, v8
3606 ; ZVFH-NEXT: vse16.v v9, (a0)
3609 ; ZVFHMIN-LABEL: fma_fv_v6f16:
3611 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3612 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
3613 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
3614 ; ZVFHMIN-NEXT: fmv.x.w a1, fa0
3615 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3616 ; ZVFHMIN-NEXT: vmv.v.x v10, a1
3617 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
3618 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
3619 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
3620 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3621 ; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12
3622 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3623 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
3624 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
3626 %a = load <6 x half>, ptr %x
3627 %b = load <6 x half>, ptr %y
3628 %c = insertelement <6 x half> poison, half %z, i32 0
3629 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
3630 %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %d, <6 x half> %a, <6 x half> %b)
3631 store <6 x half> %e, ptr %x
3635 define void @fma_fv_v4f32(ptr %x, ptr %y, float %z) {
3636 ; CHECK-LABEL: fma_fv_v4f32:
3638 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3639 ; CHECK-NEXT: vle32.v v8, (a0)
3640 ; CHECK-NEXT: vle32.v v9, (a1)
3641 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
3642 ; CHECK-NEXT: vse32.v v9, (a0)
3644 %a = load <4 x float>, ptr %x
3645 %b = load <4 x float>, ptr %y
3646 %c = insertelement <4 x float> poison, float %z, i32 0
3647 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
3648 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %d, <4 x float> %a, <4 x float> %b)
3649 store <4 x float> %e, ptr %x
3653 define void @fma_fv_v2f64(ptr %x, ptr %y, double %z) {
3654 ; CHECK-LABEL: fma_fv_v2f64:
3656 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3657 ; CHECK-NEXT: vle64.v v8, (a0)
3658 ; CHECK-NEXT: vle64.v v9, (a1)
3659 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8
3660 ; CHECK-NEXT: vse64.v v9, (a0)
3662 %a = load <2 x double>, ptr %x
3663 %b = load <2 x double>, ptr %y
3664 %c = insertelement <2 x double> poison, double %z, i32 0
3665 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
3666 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %d, <2 x double> %a, <2 x double> %b)
3667 store <2 x double> %e, ptr %x
3671 define void @fmsub_vf_v8bf16(ptr %x, ptr %y, bfloat %z) {
3672 ; CHECK-LABEL: fmsub_vf_v8bf16:
3674 ; CHECK-NEXT: fmv.x.w a2, fa0
3675 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3676 ; CHECK-NEXT: vle16.v v8, (a1)
3677 ; CHECK-NEXT: vle16.v v9, (a0)
3678 ; CHECK-NEXT: lui a1, 8
3679 ; CHECK-NEXT: vmv.v.x v10, a2
3680 ; CHECK-NEXT: vxor.vx v8, v8, a1
3681 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
3682 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
3683 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
3684 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3685 ; CHECK-NEXT: vfmadd.vv v8, v12, v14
3686 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3687 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
3688 ; CHECK-NEXT: vse16.v v10, (a0)
3690 %a = load <8 x bfloat>, ptr %x
3691 %b = load <8 x bfloat>, ptr %y
3692 %c = insertelement <8 x bfloat> poison, bfloat %z, i32 0
3693 %d = shufflevector <8 x bfloat> %c, <8 x bfloat> poison, <8 x i32> zeroinitializer
3694 %neg = fneg <8 x bfloat> %b
3695 %e = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %a, <8 x bfloat> %d, <8 x bfloat> %neg)
3696 store <8 x bfloat> %e, ptr %x
3700 define void @fmsub_vf_v6bf16(ptr %x, ptr %y, bfloat %z) {
3701 ; CHECK-LABEL: fmsub_vf_v6bf16:
3703 ; CHECK-NEXT: fmv.x.w a2, fa0
3704 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3705 ; CHECK-NEXT: vle16.v v8, (a1)
3706 ; CHECK-NEXT: vle16.v v9, (a0)
3707 ; CHECK-NEXT: lui a1, 8
3708 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3709 ; CHECK-NEXT: vmv.v.x v10, a2
3710 ; CHECK-NEXT: vxor.vx v8, v8, a1
3711 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
3712 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8
3713 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10
3714 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3715 ; CHECK-NEXT: vfmadd.vv v8, v12, v14
3716 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3717 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
3718 ; CHECK-NEXT: vse16.v v10, (a0)
3720 %a = load <6 x bfloat>, ptr %x
3721 %b = load <6 x bfloat>, ptr %y
3722 %c = insertelement <6 x bfloat> poison, bfloat %z, i32 0
3723 %d = shufflevector <6 x bfloat> %c, <6 x bfloat> poison, <6 x i32> zeroinitializer
3724 %neg = fneg <6 x bfloat> %b
3725 %e = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %a, <6 x bfloat> %d, <6 x bfloat> %neg)
3726 store <6 x bfloat> %e, ptr %x
3730 define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
3731 ; ZVFH-LABEL: fmsub_vf_v8f16:
3733 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3734 ; ZVFH-NEXT: vle16.v v8, (a0)
3735 ; ZVFH-NEXT: vle16.v v9, (a1)
3736 ; ZVFH-NEXT: vfmsac.vf v9, fa0, v8
3737 ; ZVFH-NEXT: vse16.v v9, (a0)
3740 ; ZVFHMIN-LABEL: fmsub_vf_v8f16:
3742 ; ZVFHMIN-NEXT: fmv.x.w a2, fa0
3743 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3744 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
3745 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
3746 ; ZVFHMIN-NEXT: lui a1, 8
3747 ; ZVFHMIN-NEXT: vmv.v.x v10, a2
3748 ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
3749 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
3750 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
3751 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
3752 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3753 ; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14
3754 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3755 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
3756 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
3758 %a = load <8 x half>, ptr %x
3759 %b = load <8 x half>, ptr %y
3760 %c = insertelement <8 x half> poison, half %z, i32 0
3761 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer
3762 %neg = fneg <8 x half> %b
3763 %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %neg)
3764 store <8 x half> %e, ptr %x
3768 define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
3769 ; ZVFH-LABEL: fmsub_vf_v6f16:
3771 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3772 ; ZVFH-NEXT: vle16.v v8, (a0)
3773 ; ZVFH-NEXT: vle16.v v9, (a1)
3774 ; ZVFH-NEXT: vfmsac.vf v9, fa0, v8
3775 ; ZVFH-NEXT: vse16.v v9, (a0)
3778 ; ZVFHMIN-LABEL: fmsub_vf_v6f16:
3780 ; ZVFHMIN-NEXT: fmv.x.w a2, fa0
3781 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3782 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
3783 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
3784 ; ZVFHMIN-NEXT: lui a1, 8
3785 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3786 ; ZVFHMIN-NEXT: vmv.v.x v10, a2
3787 ; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
3788 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9
3789 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8
3790 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10
3791 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3792 ; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14
3793 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3794 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
3795 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
3797 %a = load <6 x half>, ptr %x
3798 %b = load <6 x half>, ptr %y
3799 %c = insertelement <6 x half> poison, half %z, i32 0
3800 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer
3801 %neg = fneg <6 x half> %b
3802 %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %d, <6 x half> %neg)
3803 store <6 x half> %e, ptr %x
3807 define void @fnmsub_vf_v4f32(ptr %x, ptr %y, float %z) {
3808 ; CHECK-LABEL: fnmsub_vf_v4f32:
3810 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3811 ; CHECK-NEXT: vle32.v v8, (a0)
3812 ; CHECK-NEXT: vle32.v v9, (a1)
3813 ; CHECK-NEXT: vfnmsac.vf v9, fa0, v8
3814 ; CHECK-NEXT: vse32.v v9, (a0)
3816 %a = load <4 x float>, ptr %x
3817 %b = load <4 x float>, ptr %y
3818 %c = insertelement <4 x float> poison, float %z, i32 0
3819 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
3820 %neg = fneg <4 x float> %a
3821 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %d, <4 x float> %b)
3822 store <4 x float> %e, ptr %x
3826 define void @fnmadd_vf_v2f64(ptr %x, ptr %y, double %z) {
3827 ; CHECK-LABEL: fnmadd_vf_v2f64:
3829 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3830 ; CHECK-NEXT: vle64.v v8, (a0)
3831 ; CHECK-NEXT: vle64.v v9, (a1)
3832 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8
3833 ; CHECK-NEXT: vse64.v v9, (a0)
3835 %a = load <2 x double>, ptr %x
3836 %b = load <2 x double>, ptr %y
3837 %c = insertelement <2 x double> poison, double %z, i32 0
3838 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
3839 %neg = fneg <2 x double> %a
3840 %neg2 = fneg <2 x double> %b
3841 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %neg, <2 x double> %d, <2 x double> %neg2)
3842 store <2 x double> %e, ptr %x
3846 define void @fnmsub_fv_v4f32(ptr %x, ptr %y, float %z) {
3847 ; CHECK-LABEL: fnmsub_fv_v4f32:
3849 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
3850 ; CHECK-NEXT: vle32.v v8, (a0)
3851 ; CHECK-NEXT: vle32.v v9, (a1)
3852 ; CHECK-NEXT: vfnmsac.vf v9, fa0, v8
3853 ; CHECK-NEXT: vse32.v v9, (a0)
3855 %a = load <4 x float>, ptr %x
3856 %b = load <4 x float>, ptr %y
3857 %c = insertelement <4 x float> poison, float %z, i32 0
3858 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer
3859 %neg = fneg <4 x float> %d
3860 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %a, <4 x float> %b)
3861 store <4 x float> %e, ptr %x
3865 define void @fnmadd_fv_v2f64(ptr %x, ptr %y, double %z) {
3866 ; CHECK-LABEL: fnmadd_fv_v2f64:
3868 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
3869 ; CHECK-NEXT: vle64.v v8, (a0)
3870 ; CHECK-NEXT: vle64.v v9, (a1)
3871 ; CHECK-NEXT: vfnmacc.vf v9, fa0, v8
3872 ; CHECK-NEXT: vse64.v v9, (a0)
3874 %a = load <2 x double>, ptr %x
3875 %b = load <2 x double>, ptr %y
3876 %c = insertelement <2 x double> poison, double %z, i32 0
3877 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer
3878 %neg = fneg <2 x double> %d
3879 %neg2 = fneg <2 x double> %b
3880 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %neg, <2 x double> %a, <2 x double> %neg2)
3881 store <2 x double> %e, ptr %x
3885 define void @trunc_v8bf16(ptr %x) {
3886 ; CHECK-LABEL: trunc_v8bf16:
3888 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3889 ; CHECK-NEXT: vle16.v v8, (a0)
3890 ; CHECK-NEXT: lui a1, 307200
3891 ; CHECK-NEXT: fmv.w.x fa5, a1
3892 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
3893 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3894 ; CHECK-NEXT: vfabs.v v8, v10
3895 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
3896 ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
3897 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
3898 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
3899 ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
3900 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3901 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
3902 ; CHECK-NEXT: vse16.v v8, (a0)
3904 %a = load <8 x bfloat>, ptr %x
3905 %b = call <8 x bfloat> @llvm.trunc.v8bf16(<8 x bfloat> %a)
3906 store <8 x bfloat> %b, ptr %x
3910 define void @trunc_v6bf16(ptr %x) {
3911 ; CHECK-LABEL: trunc_v6bf16:
3913 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3914 ; CHECK-NEXT: vle16.v v8, (a0)
3915 ; CHECK-NEXT: lui a1, 307200
3916 ; CHECK-NEXT: fmv.w.x fa5, a1
3917 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3918 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
3919 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3920 ; CHECK-NEXT: vfabs.v v8, v10
3921 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
3922 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
3923 ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
3924 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
3925 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
3926 ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
3927 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3928 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
3929 ; CHECK-NEXT: vse16.v v8, (a0)
3931 %a = load <6 x bfloat>, ptr %x
3932 %b = call <6 x bfloat> @llvm.trunc.v6bf16(<6 x bfloat> %a)
3933 store <6 x bfloat> %b, ptr %x
3937 define void @trunc_v8f16(ptr %x) {
3938 ; ZVFH-LABEL: trunc_v8f16:
3940 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3941 ; ZVFH-NEXT: vle16.v v8, (a0)
3942 ; ZVFH-NEXT: lui a1, %hi(.LCPI171_0)
3943 ; ZVFH-NEXT: flh fa5, %lo(.LCPI171_0)(a1)
3944 ; ZVFH-NEXT: vfabs.v v9, v8
3945 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
3946 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
3947 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
3948 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
3949 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
3950 ; ZVFH-NEXT: vse16.v v8, (a0)
3953 ; ZVFHMIN-LABEL: trunc_v8f16:
3955 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3956 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3957 ; ZVFHMIN-NEXT: lui a1, 307200
3958 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
3959 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
3960 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
3961 ; ZVFHMIN-NEXT: vfabs.v v8, v10
3962 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
3963 ; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
3964 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
3965 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
3966 ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
3967 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
3968 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
3969 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
3971 %a = load <8 x half>, ptr %x
3972 %b = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a)
3973 store <8 x half> %b, ptr %x
3977 define void @trunc_v6f16(ptr %x) {
3978 ; ZVFH-LABEL: trunc_v6f16:
3980 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3981 ; ZVFH-NEXT: vle16.v v8, (a0)
3982 ; ZVFH-NEXT: lui a1, %hi(.LCPI172_0)
3983 ; ZVFH-NEXT: flh fa5, %lo(.LCPI172_0)(a1)
3984 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
3985 ; ZVFH-NEXT: vfabs.v v9, v8
3986 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
3987 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3988 ; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
3989 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
3990 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
3991 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
3992 ; ZVFH-NEXT: vse16.v v8, (a0)
3995 ; ZVFHMIN-LABEL: trunc_v6f16:
3997 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
3998 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
3999 ; ZVFHMIN-NEXT: lui a1, 307200
4000 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4001 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4002 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
4003 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4004 ; ZVFHMIN-NEXT: vfabs.v v8, v10
4005 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4006 ; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
4007 ; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t
4008 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
4009 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4010 ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4011 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4012 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
4013 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
4015 %a = load <6 x half>, ptr %x
4016 %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a)
4017 store <6 x half> %b, ptr %x
4021 define void @trunc_v4f32(ptr %x) {
4022 ; CHECK-LABEL: trunc_v4f32:
4024 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4025 ; CHECK-NEXT: vle32.v v8, (a0)
4026 ; CHECK-NEXT: lui a1, 307200
4027 ; CHECK-NEXT: fmv.w.x fa5, a1
4028 ; CHECK-NEXT: vfabs.v v9, v8
4029 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4030 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
4031 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4032 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4033 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4034 ; CHECK-NEXT: vse32.v v8, (a0)
4036 %a = load <4 x float>, ptr %x
4037 %b = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
4038 store <4 x float> %b, ptr %x
4042 define void @trunc_v2f64(ptr %x) {
4043 ; CHECK-LABEL: trunc_v2f64:
4045 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4046 ; CHECK-NEXT: vle64.v v8, (a0)
4047 ; CHECK-NEXT: lui a1, %hi(.LCPI174_0)
4048 ; CHECK-NEXT: fld fa5, %lo(.LCPI174_0)(a1)
4049 ; CHECK-NEXT: vfabs.v v9, v8
4050 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4051 ; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
4052 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4053 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
4054 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4055 ; CHECK-NEXT: vse64.v v8, (a0)
4057 %a = load <2 x double>, ptr %x
4058 %b = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a)
4059 store <2 x double> %b, ptr %x
4063 define void @ceil_v8bf16(ptr %x) {
4064 ; CHECK-LABEL: ceil_v8bf16:
4066 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4067 ; CHECK-NEXT: vle16.v v8, (a0)
4068 ; CHECK-NEXT: lui a1, 307200
4069 ; CHECK-NEXT: fmv.w.x fa5, a1
4070 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
4071 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4072 ; CHECK-NEXT: vfabs.v v8, v10
4073 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
4074 ; CHECK-NEXT: fsrmi a1, 3
4075 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
4076 ; CHECK-NEXT: fsrm a1
4077 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
4078 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4079 ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4080 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4081 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
4082 ; CHECK-NEXT: vse16.v v8, (a0)
4084 %a = load <8 x bfloat>, ptr %x
4085 %b = call <8 x bfloat> @llvm.ceil.v8bf16(<8 x bfloat> %a)
4086 store <8 x bfloat> %b, ptr %x
4090 define void @ceil_v6bf16(ptr %x) {
4091 ; CHECK-LABEL: ceil_v6bf16:
4093 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4094 ; CHECK-NEXT: vle16.v v8, (a0)
4095 ; CHECK-NEXT: lui a1, 307200
4096 ; CHECK-NEXT: fmv.w.x fa5, a1
4097 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4098 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
4099 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4100 ; CHECK-NEXT: vfabs.v v8, v10
4101 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
4102 ; CHECK-NEXT: fsrmi a1, 3
4103 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
4104 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
4105 ; CHECK-NEXT: fsrm a1
4106 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
4107 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4108 ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4109 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4110 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
4111 ; CHECK-NEXT: vse16.v v8, (a0)
4113 %a = load <6 x bfloat>, ptr %x
4114 %b = call <6 x bfloat> @llvm.ceil.v6bf16(<6 x bfloat> %a)
4115 store <6 x bfloat> %b, ptr %x
4119 define void @ceil_v8f16(ptr %x) {
4120 ; ZVFH-LABEL: ceil_v8f16:
4122 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4123 ; ZVFH-NEXT: vle16.v v8, (a0)
4124 ; ZVFH-NEXT: lui a1, %hi(.LCPI177_0)
4125 ; ZVFH-NEXT: flh fa5, %lo(.LCPI177_0)(a1)
4126 ; ZVFH-NEXT: vfabs.v v9, v8
4127 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4128 ; ZVFH-NEXT: fsrmi a1, 3
4129 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4130 ; ZVFH-NEXT: fsrm a1
4131 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4132 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4133 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4134 ; ZVFH-NEXT: vse16.v v8, (a0)
4137 ; ZVFHMIN-LABEL: ceil_v8f16:
4139 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4140 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4141 ; ZVFHMIN-NEXT: lui a1, 307200
4142 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4143 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
4144 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4145 ; ZVFHMIN-NEXT: vfabs.v v8, v10
4146 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4147 ; ZVFHMIN-NEXT: fsrmi a1, 3
4148 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
4149 ; ZVFHMIN-NEXT: fsrm a1
4150 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
4151 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4152 ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4153 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4154 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
4155 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
4157 %a = load <8 x half>, ptr %x
4158 %b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a)
4159 store <8 x half> %b, ptr %x
4163 define void @ceil_v6f16(ptr %x) {
4164 ; ZVFH-LABEL: ceil_v6f16:
4166 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4167 ; ZVFH-NEXT: vle16.v v8, (a0)
4168 ; ZVFH-NEXT: lui a1, %hi(.LCPI178_0)
4169 ; ZVFH-NEXT: flh fa5, %lo(.LCPI178_0)(a1)
4170 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4171 ; ZVFH-NEXT: vfabs.v v9, v8
4172 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4173 ; ZVFH-NEXT: fsrmi a1, 3
4174 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4175 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4176 ; ZVFH-NEXT: fsrm a1
4177 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4178 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4179 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4180 ; ZVFH-NEXT: vse16.v v8, (a0)
4183 ; ZVFHMIN-LABEL: ceil_v6f16:
4185 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4186 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4187 ; ZVFHMIN-NEXT: lui a1, 307200
4188 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4189 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4190 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
4191 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4192 ; ZVFHMIN-NEXT: vfabs.v v8, v10
4193 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4194 ; ZVFHMIN-NEXT: fsrmi a1, 3
4195 ; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
4196 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
4197 ; ZVFHMIN-NEXT: fsrm a1
4198 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
4199 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4200 ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4201 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4202 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
4203 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
4205 %a = load <6 x half>, ptr %x
4206 %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a)
4207 store <6 x half> %b, ptr %x
4211 define void @ceil_v4f32(ptr %x) {
4212 ; CHECK-LABEL: ceil_v4f32:
4214 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4215 ; CHECK-NEXT: vle32.v v8, (a0)
4216 ; CHECK-NEXT: lui a1, 307200
4217 ; CHECK-NEXT: fmv.w.x fa5, a1
4218 ; CHECK-NEXT: vfabs.v v9, v8
4219 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4220 ; CHECK-NEXT: fsrmi a1, 3
4221 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4222 ; CHECK-NEXT: fsrm a1
4223 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4224 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4225 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4226 ; CHECK-NEXT: vse32.v v8, (a0)
4228 %a = load <4 x float>, ptr %x
4229 %b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
4230 store <4 x float> %b, ptr %x
4234 define void @ceil_v2f64(ptr %x) {
4235 ; CHECK-LABEL: ceil_v2f64:
4237 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4238 ; CHECK-NEXT: vle64.v v8, (a0)
4239 ; CHECK-NEXT: lui a1, %hi(.LCPI180_0)
4240 ; CHECK-NEXT: fld fa5, %lo(.LCPI180_0)(a1)
4241 ; CHECK-NEXT: vfabs.v v9, v8
4242 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4243 ; CHECK-NEXT: fsrmi a1, 3
4244 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4245 ; CHECK-NEXT: fsrm a1
4246 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4247 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
4248 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4249 ; CHECK-NEXT: vse64.v v8, (a0)
4251 %a = load <2 x double>, ptr %x
4252 %b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a)
4253 store <2 x double> %b, ptr %x
4257 define void @floor_v8bf16(ptr %x) {
4258 ; CHECK-LABEL: floor_v8bf16:
4260 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4261 ; CHECK-NEXT: vle16.v v8, (a0)
4262 ; CHECK-NEXT: lui a1, 307200
4263 ; CHECK-NEXT: fmv.w.x fa5, a1
4264 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
4265 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4266 ; CHECK-NEXT: vfabs.v v8, v10
4267 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
4268 ; CHECK-NEXT: fsrmi a1, 2
4269 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
4270 ; CHECK-NEXT: fsrm a1
4271 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
4272 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4273 ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4274 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4275 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
4276 ; CHECK-NEXT: vse16.v v8, (a0)
4278 %a = load <8 x bfloat>, ptr %x
4279 %b = call <8 x bfloat> @llvm.floor.v8bf16(<8 x bfloat> %a)
4280 store <8 x bfloat> %b, ptr %x
4284 define void @floor_v6bf16(ptr %x) {
4285 ; CHECK-LABEL: floor_v6bf16:
4287 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4288 ; CHECK-NEXT: vle16.v v8, (a0)
4289 ; CHECK-NEXT: lui a1, 307200
4290 ; CHECK-NEXT: fmv.w.x fa5, a1
4291 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4292 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
4293 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4294 ; CHECK-NEXT: vfabs.v v8, v10
4295 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
4296 ; CHECK-NEXT: fsrmi a1, 2
4297 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
4298 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
4299 ; CHECK-NEXT: fsrm a1
4300 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
4301 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4302 ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4303 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4304 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
4305 ; CHECK-NEXT: vse16.v v8, (a0)
4307 %a = load <6 x bfloat>, ptr %x
4308 %b = call <6 x bfloat> @llvm.floor.v6bf16(<6 x bfloat> %a)
4309 store <6 x bfloat> %b, ptr %x
4313 define void @floor_v8f16(ptr %x) {
4314 ; ZVFH-LABEL: floor_v8f16:
4316 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4317 ; ZVFH-NEXT: vle16.v v8, (a0)
4318 ; ZVFH-NEXT: lui a1, %hi(.LCPI183_0)
4319 ; ZVFH-NEXT: flh fa5, %lo(.LCPI183_0)(a1)
4320 ; ZVFH-NEXT: vfabs.v v9, v8
4321 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4322 ; ZVFH-NEXT: fsrmi a1, 2
4323 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4324 ; ZVFH-NEXT: fsrm a1
4325 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4326 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4327 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4328 ; ZVFH-NEXT: vse16.v v8, (a0)
4331 ; ZVFHMIN-LABEL: floor_v8f16:
4333 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4334 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4335 ; ZVFHMIN-NEXT: lui a1, 307200
4336 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4337 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
4338 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4339 ; ZVFHMIN-NEXT: vfabs.v v8, v10
4340 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4341 ; ZVFHMIN-NEXT: fsrmi a1, 2
4342 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
4343 ; ZVFHMIN-NEXT: fsrm a1
4344 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
4345 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4346 ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4347 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4348 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
4349 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
4351 %a = load <8 x half>, ptr %x
4352 %b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a)
4353 store <8 x half> %b, ptr %x
4357 define void @floor_v6f16(ptr %x) {
4358 ; ZVFH-LABEL: floor_v6f16:
4360 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4361 ; ZVFH-NEXT: vle16.v v8, (a0)
4362 ; ZVFH-NEXT: lui a1, %hi(.LCPI184_0)
4363 ; ZVFH-NEXT: flh fa5, %lo(.LCPI184_0)(a1)
4364 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4365 ; ZVFH-NEXT: vfabs.v v9, v8
4366 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4367 ; ZVFH-NEXT: fsrmi a1, 2
4368 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4369 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4370 ; ZVFH-NEXT: fsrm a1
4371 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4372 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4373 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4374 ; ZVFH-NEXT: vse16.v v8, (a0)
4377 ; ZVFHMIN-LABEL: floor_v6f16:
4379 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4380 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4381 ; ZVFHMIN-NEXT: lui a1, 307200
4382 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4383 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4384 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
4385 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4386 ; ZVFHMIN-NEXT: vfabs.v v8, v10
4387 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4388 ; ZVFHMIN-NEXT: fsrmi a1, 2
4389 ; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
4390 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
4391 ; ZVFHMIN-NEXT: fsrm a1
4392 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
4393 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4394 ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4395 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4396 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
4397 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
4399 %a = load <6 x half>, ptr %x
4400 %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a)
4401 store <6 x half> %b, ptr %x
4405 define void @floor_v4f32(ptr %x) {
4406 ; CHECK-LABEL: floor_v4f32:
4408 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4409 ; CHECK-NEXT: vle32.v v8, (a0)
4410 ; CHECK-NEXT: lui a1, 307200
4411 ; CHECK-NEXT: fmv.w.x fa5, a1
4412 ; CHECK-NEXT: vfabs.v v9, v8
4413 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4414 ; CHECK-NEXT: fsrmi a1, 2
4415 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4416 ; CHECK-NEXT: fsrm a1
4417 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4418 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4419 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4420 ; CHECK-NEXT: vse32.v v8, (a0)
4422 %a = load <4 x float>, ptr %x
4423 %b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
4424 store <4 x float> %b, ptr %x
4428 define void @floor_v2f64(ptr %x) {
4429 ; CHECK-LABEL: floor_v2f64:
4431 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4432 ; CHECK-NEXT: vle64.v v8, (a0)
4433 ; CHECK-NEXT: lui a1, %hi(.LCPI186_0)
4434 ; CHECK-NEXT: fld fa5, %lo(.LCPI186_0)(a1)
4435 ; CHECK-NEXT: vfabs.v v9, v8
4436 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4437 ; CHECK-NEXT: fsrmi a1, 2
4438 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4439 ; CHECK-NEXT: fsrm a1
4440 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4441 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
4442 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4443 ; CHECK-NEXT: vse64.v v8, (a0)
4445 %a = load <2 x double>, ptr %x
4446 %b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a)
4447 store <2 x double> %b, ptr %x
4451 define void @round_v8bf16(ptr %x) {
4452 ; CHECK-LABEL: round_v8bf16:
4454 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4455 ; CHECK-NEXT: vle16.v v8, (a0)
4456 ; CHECK-NEXT: lui a1, 307200
4457 ; CHECK-NEXT: fmv.w.x fa5, a1
4458 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
4459 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4460 ; CHECK-NEXT: vfabs.v v8, v10
4461 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
4462 ; CHECK-NEXT: fsrmi a1, 4
4463 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
4464 ; CHECK-NEXT: fsrm a1
4465 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
4466 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4467 ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4468 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4469 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
4470 ; CHECK-NEXT: vse16.v v8, (a0)
4472 %a = load <8 x bfloat>, ptr %x
4473 %b = call <8 x bfloat> @llvm.round.v8bf16(<8 x bfloat> %a)
4474 store <8 x bfloat> %b, ptr %x
4478 define void @round_v6bf16(ptr %x) {
4479 ; CHECK-LABEL: round_v6bf16:
4481 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4482 ; CHECK-NEXT: vle16.v v8, (a0)
4483 ; CHECK-NEXT: lui a1, 307200
4484 ; CHECK-NEXT: fmv.w.x fa5, a1
4485 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4486 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
4487 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4488 ; CHECK-NEXT: vfabs.v v8, v10
4489 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
4490 ; CHECK-NEXT: fsrmi a1, 4
4491 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
4492 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
4493 ; CHECK-NEXT: fsrm a1
4494 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
4495 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4496 ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4497 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4498 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
4499 ; CHECK-NEXT: vse16.v v8, (a0)
4501 %a = load <6 x bfloat>, ptr %x
4502 %b = call <6 x bfloat> @llvm.round.v6bf16(<6 x bfloat> %a)
4503 store <6 x bfloat> %b, ptr %x
4507 define void @round_v8f16(ptr %x) {
4508 ; ZVFH-LABEL: round_v8f16:
4510 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4511 ; ZVFH-NEXT: vle16.v v8, (a0)
4512 ; ZVFH-NEXT: lui a1, %hi(.LCPI189_0)
4513 ; ZVFH-NEXT: flh fa5, %lo(.LCPI189_0)(a1)
4514 ; ZVFH-NEXT: vfabs.v v9, v8
4515 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4516 ; ZVFH-NEXT: fsrmi a1, 4
4517 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4518 ; ZVFH-NEXT: fsrm a1
4519 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4520 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4521 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4522 ; ZVFH-NEXT: vse16.v v8, (a0)
4525 ; ZVFHMIN-LABEL: round_v8f16:
4527 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4528 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4529 ; ZVFHMIN-NEXT: lui a1, 307200
4530 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4531 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
4532 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4533 ; ZVFHMIN-NEXT: vfabs.v v8, v10
4534 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4535 ; ZVFHMIN-NEXT: fsrmi a1, 4
4536 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
4537 ; ZVFHMIN-NEXT: fsrm a1
4538 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
4539 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4540 ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4541 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4542 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
4543 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
4545 %a = load <8 x half>, ptr %x
4546 %b = call <8 x half> @llvm.round.v8f16(<8 x half> %a)
4547 store <8 x half> %b, ptr %x
4551 define void @round_v6f16(ptr %x) {
4552 ; ZVFH-LABEL: round_v6f16:
4554 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4555 ; ZVFH-NEXT: vle16.v v8, (a0)
4556 ; ZVFH-NEXT: lui a1, %hi(.LCPI190_0)
4557 ; ZVFH-NEXT: flh fa5, %lo(.LCPI190_0)(a1)
4558 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4559 ; ZVFH-NEXT: vfabs.v v9, v8
4560 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4561 ; ZVFH-NEXT: fsrmi a1, 4
4562 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4563 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4564 ; ZVFH-NEXT: fsrm a1
4565 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4566 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4567 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4568 ; ZVFH-NEXT: vse16.v v8, (a0)
4571 ; ZVFHMIN-LABEL: round_v6f16:
4573 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4574 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4575 ; ZVFHMIN-NEXT: lui a1, 307200
4576 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4577 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4578 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
4579 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4580 ; ZVFHMIN-NEXT: vfabs.v v8, v10
4581 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4582 ; ZVFHMIN-NEXT: fsrmi a1, 4
4583 ; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma
4584 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
4585 ; ZVFHMIN-NEXT: fsrm a1
4586 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
4587 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4588 ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4589 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4590 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
4591 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
4593 %a = load <6 x half>, ptr %x
4594 %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a)
4595 store <6 x half> %b, ptr %x
4599 define void @round_v4f32(ptr %x) {
4600 ; CHECK-LABEL: round_v4f32:
4602 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4603 ; CHECK-NEXT: vle32.v v8, (a0)
4604 ; CHECK-NEXT: lui a1, 307200
4605 ; CHECK-NEXT: fmv.w.x fa5, a1
4606 ; CHECK-NEXT: vfabs.v v9, v8
4607 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4608 ; CHECK-NEXT: fsrmi a1, 4
4609 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4610 ; CHECK-NEXT: fsrm a1
4611 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4612 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4613 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4614 ; CHECK-NEXT: vse32.v v8, (a0)
4616 %a = load <4 x float>, ptr %x
4617 %b = call <4 x float> @llvm.round.v4f32(<4 x float> %a)
4618 store <4 x float> %b, ptr %x
4622 define void @round_v2f64(ptr %x) {
4623 ; CHECK-LABEL: round_v2f64:
4625 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4626 ; CHECK-NEXT: vle64.v v8, (a0)
4627 ; CHECK-NEXT: lui a1, %hi(.LCPI192_0)
4628 ; CHECK-NEXT: fld fa5, %lo(.LCPI192_0)(a1)
4629 ; CHECK-NEXT: vfabs.v v9, v8
4630 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4631 ; CHECK-NEXT: fsrmi a1, 4
4632 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4633 ; CHECK-NEXT: fsrm a1
4634 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4635 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
4636 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4637 ; CHECK-NEXT: vse64.v v8, (a0)
4639 %a = load <2 x double>, ptr %x
4640 %b = call <2 x double> @llvm.round.v2f64(<2 x double> %a)
4641 store <2 x double> %b, ptr %x
4645 define void @rint_v8bf16(ptr %x) {
4646 ; CHECK-LABEL: rint_v8bf16:
4648 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4649 ; CHECK-NEXT: vle16.v v8, (a0)
4650 ; CHECK-NEXT: lui a1, 307200
4651 ; CHECK-NEXT: fmv.w.x fa5, a1
4652 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
4653 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4654 ; CHECK-NEXT: vfabs.v v8, v10
4655 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
4656 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
4657 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
4658 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4659 ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4660 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4661 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
4662 ; CHECK-NEXT: vse16.v v8, (a0)
4664 %a = load <8 x bfloat>, ptr %x
4665 %b = call <8 x bfloat> @llvm.rint.v8bf16(<8 x bfloat> %a)
4666 store <8 x bfloat> %b, ptr %x
4670 define void @rint_v8f16(ptr %x) {
4671 ; ZVFH-LABEL: rint_v8f16:
4673 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4674 ; ZVFH-NEXT: vle16.v v8, (a0)
4675 ; ZVFH-NEXT: lui a1, %hi(.LCPI194_0)
4676 ; ZVFH-NEXT: flh fa5, %lo(.LCPI194_0)(a1)
4677 ; ZVFH-NEXT: vfabs.v v9, v8
4678 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4679 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4680 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4681 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4682 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4683 ; ZVFH-NEXT: vse16.v v8, (a0)
4686 ; ZVFHMIN-LABEL: rint_v8f16:
4688 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4689 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4690 ; ZVFHMIN-NEXT: lui a1, 307200
4691 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4692 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
4693 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4694 ; ZVFHMIN-NEXT: vfabs.v v8, v10
4695 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4696 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
4697 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
4698 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4699 ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4700 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4701 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
4702 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
4704 %a = load <8 x half>, ptr %x
4705 %b = call <8 x half> @llvm.rint.v8f16(<8 x half> %a)
4706 store <8 x half> %b, ptr %x
4710 define void @rint_v4f32(ptr %x) {
4711 ; CHECK-LABEL: rint_v4f32:
4713 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4714 ; CHECK-NEXT: vle32.v v8, (a0)
4715 ; CHECK-NEXT: lui a1, 307200
4716 ; CHECK-NEXT: fmv.w.x fa5, a1
4717 ; CHECK-NEXT: vfabs.v v9, v8
4718 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4719 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4720 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4721 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4722 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4723 ; CHECK-NEXT: vse32.v v8, (a0)
4725 %a = load <4 x float>, ptr %x
4726 %b = call <4 x float> @llvm.rint.v4f32(<4 x float> %a)
4727 store <4 x float> %b, ptr %x
4731 define void @rint_v2f64(ptr %x) {
4732 ; CHECK-LABEL: rint_v2f64:
4734 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4735 ; CHECK-NEXT: vle64.v v8, (a0)
4736 ; CHECK-NEXT: lui a1, %hi(.LCPI196_0)
4737 ; CHECK-NEXT: fld fa5, %lo(.LCPI196_0)(a1)
4738 ; CHECK-NEXT: vfabs.v v9, v8
4739 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4740 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4741 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4742 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
4743 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4744 ; CHECK-NEXT: vse64.v v8, (a0)
4746 %a = load <2 x double>, ptr %x
4747 %b = call <2 x double> @llvm.rint.v2f64(<2 x double> %a)
4748 store <2 x double> %b, ptr %x
4752 define void @nearbyint_v8bf16(ptr %x) {
4753 ; CHECK-LABEL: nearbyint_v8bf16:
4755 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4756 ; CHECK-NEXT: vle16.v v8, (a0)
4757 ; CHECK-NEXT: lui a1, 307200
4758 ; CHECK-NEXT: fmv.w.x fa5, a1
4759 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
4760 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4761 ; CHECK-NEXT: vfabs.v v8, v10
4762 ; CHECK-NEXT: vmflt.vf v0, v8, fa5
4763 ; CHECK-NEXT: frflags a1
4764 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t
4765 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
4766 ; CHECK-NEXT: fsflags a1
4767 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4768 ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4769 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4770 ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
4771 ; CHECK-NEXT: vse16.v v8, (a0)
4773 %a = load <8 x bfloat>, ptr %x
4774 %b = call <8 x bfloat> @llvm.nearbyint.v8bf16(<8 x bfloat> %a)
4775 store <8 x bfloat> %b, ptr %x
4779 define void @nearbyint_v8f16(ptr %x) {
4780 ; ZVFH-LABEL: nearbyint_v8f16:
4782 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4783 ; ZVFH-NEXT: vle16.v v8, (a0)
4784 ; ZVFH-NEXT: lui a1, %hi(.LCPI198_0)
4785 ; ZVFH-NEXT: flh fa5, %lo(.LCPI198_0)(a1)
4786 ; ZVFH-NEXT: vfabs.v v9, v8
4787 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5
4788 ; ZVFH-NEXT: frflags a1
4789 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t
4790 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t
4791 ; ZVFH-NEXT: fsflags a1
4792 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu
4793 ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4794 ; ZVFH-NEXT: vse16.v v8, (a0)
4797 ; ZVFHMIN-LABEL: nearbyint_v8f16:
4799 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4800 ; ZVFHMIN-NEXT: vle16.v v8, (a0)
4801 ; ZVFHMIN-NEXT: lui a1, 307200
4802 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1
4803 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8
4804 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4805 ; ZVFHMIN-NEXT: vfabs.v v8, v10
4806 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5
4807 ; ZVFHMIN-NEXT: frflags a1
4808 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t
4809 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t
4810 ; ZVFHMIN-NEXT: fsflags a1
4811 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu
4812 ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t
4813 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4814 ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10
4815 ; ZVFHMIN-NEXT: vse16.v v8, (a0)
4817 %a = load <8 x half>, ptr %x
4818 %b = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a)
4819 store <8 x half> %b, ptr %x
4823 define void @nearbyint_v4f32(ptr %x) {
4824 ; CHECK-LABEL: nearbyint_v4f32:
4826 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
4827 ; CHECK-NEXT: vle32.v v8, (a0)
4828 ; CHECK-NEXT: lui a1, 307200
4829 ; CHECK-NEXT: fmv.w.x fa5, a1
4830 ; CHECK-NEXT: vfabs.v v9, v8
4831 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4832 ; CHECK-NEXT: frflags a1
4833 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4834 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4835 ; CHECK-NEXT: fsflags a1
4836 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
4837 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4838 ; CHECK-NEXT: vse32.v v8, (a0)
4840 %a = load <4 x float>, ptr %x
4841 %b = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a)
4842 store <4 x float> %b, ptr %x
4846 define void @nearbyint_v2f64(ptr %x) {
4847 ; CHECK-LABEL: nearbyint_v2f64:
4849 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
4850 ; CHECK-NEXT: vle64.v v8, (a0)
4851 ; CHECK-NEXT: lui a1, %hi(.LCPI200_0)
4852 ; CHECK-NEXT: fld fa5, %lo(.LCPI200_0)(a1)
4853 ; CHECK-NEXT: vfabs.v v9, v8
4854 ; CHECK-NEXT: vmflt.vf v0, v9, fa5
4855 ; CHECK-NEXT: frflags a1
4856 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
4857 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
4858 ; CHECK-NEXT: fsflags a1
4859 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
4860 ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
4861 ; CHECK-NEXT: vse64.v v8, (a0)
4863 %a = load <2 x double>, ptr %x
4864 %b = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a)
4865 store <2 x double> %b, ptr %x
4869 define void @fmuladd_v8bf16(ptr %x, ptr %y, ptr %z) {
4870 ; CHECK-LABEL: fmuladd_v8bf16:
4872 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4873 ; CHECK-NEXT: vle16.v v8, (a1)
4874 ; CHECK-NEXT: vle16.v v9, (a0)
4875 ; CHECK-NEXT: vle16.v v10, (a2)
4876 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
4877 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
4878 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4879 ; CHECK-NEXT: vfmul.vv v8, v14, v12
4880 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4881 ; CHECK-NEXT: vfncvtbf16.f.f.w v11, v8
4882 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v11
4883 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
4884 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4885 ; CHECK-NEXT: vfadd.vv v8, v8, v12
4886 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4887 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
4888 ; CHECK-NEXT: vse16.v v10, (a0)
4890 %a = load <8 x bfloat>, ptr %x
4891 %b = load <8 x bfloat>, ptr %y
4892 %c = load <8 x bfloat>, ptr %z
4893 %d = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c)
4894 store <8 x bfloat> %d, ptr %x
4898 define void @fmuladd_v6bf16(ptr %x, ptr %y, ptr %z) {
4899 ; CHECK-LABEL: fmuladd_v6bf16:
4901 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4902 ; CHECK-NEXT: vle16.v v8, (a1)
4903 ; CHECK-NEXT: vle16.v v9, (a0)
4904 ; CHECK-NEXT: vle16.v v10, (a2)
4905 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
4906 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
4907 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4908 ; CHECK-NEXT: vfmul.vv v8, v14, v12
4909 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4910 ; CHECK-NEXT: vfncvtbf16.f.f.w v11, v8
4911 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v11
4912 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
4913 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4914 ; CHECK-NEXT: vfadd.vv v8, v8, v12
4915 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4916 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
4917 ; CHECK-NEXT: vse16.v v10, (a0)
4919 %a = load <6 x bfloat>, ptr %x
4920 %b = load <6 x bfloat>, ptr %y
4921 %c = load <6 x bfloat>, ptr %z
4922 %d = call <6 x bfloat> @llvm.fmuladd.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b, <6 x bfloat> %c)
4923 store <6 x bfloat> %d, ptr %x
4927 define void @fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
4928 ; ZVFH-LABEL: fmuladd_v8f16:
4930 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4931 ; ZVFH-NEXT: vle16.v v8, (a0)
4932 ; ZVFH-NEXT: vle16.v v9, (a1)
4933 ; ZVFH-NEXT: vle16.v v10, (a2)
4934 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
4935 ; ZVFH-NEXT: vse16.v v10, (a0)
4938 ; ZVFHMIN-LABEL: fmuladd_v8f16:
4940 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
4941 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
4942 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
4943 ; ZVFHMIN-NEXT: vle16.v v10, (a2)
4944 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
4945 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
4946 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4947 ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12
4948 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4949 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
4950 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
4951 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
4952 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4953 ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v12
4954 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4955 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
4956 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
4958 %a = load <8 x half>, ptr %x
4959 %b = load <8 x half>, ptr %y
4960 %c = load <8 x half>, ptr %z
4961 %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c)
4962 store <8 x half> %d, ptr %x
4966 define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
4967 ; ZVFH-LABEL: fmuladd_v6f16:
4969 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4970 ; ZVFH-NEXT: vle16.v v8, (a0)
4971 ; ZVFH-NEXT: vle16.v v9, (a1)
4972 ; ZVFH-NEXT: vle16.v v10, (a2)
4973 ; ZVFH-NEXT: vfmacc.vv v10, v8, v9
4974 ; ZVFH-NEXT: vse16.v v10, (a0)
4977 ; ZVFHMIN-LABEL: fmuladd_v6f16:
4979 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
4980 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
4981 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
4982 ; ZVFHMIN-NEXT: vle16.v v10, (a2)
4983 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
4984 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
4985 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4986 ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12
4987 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4988 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
4989 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
4990 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
4991 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
4992 ; ZVFHMIN-NEXT: vfadd.vv v8, v8, v12
4993 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4994 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
4995 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
4997 %a = load <6 x half>, ptr %x
4998 %b = load <6 x half>, ptr %y
4999 %c = load <6 x half>, ptr %z
5000 %d = call <6 x half> @llvm.fmuladd.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %c)
5001 store <6 x half> %d, ptr %x
5005 define void @fmuladd_v4f32(ptr %x, ptr %y, ptr %z) {
5006 ; CHECK-LABEL: fmuladd_v4f32:
5008 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5009 ; CHECK-NEXT: vle32.v v8, (a0)
5010 ; CHECK-NEXT: vle32.v v9, (a1)
5011 ; CHECK-NEXT: vle32.v v10, (a2)
5012 ; CHECK-NEXT: vfmacc.vv v10, v8, v9
5013 ; CHECK-NEXT: vse32.v v10, (a0)
5015 %a = load <4 x float>, ptr %x
5016 %b = load <4 x float>, ptr %y
5017 %c = load <4 x float>, ptr %z
5018 %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
5019 store <4 x float> %d, ptr %x
5023 define void @fmuladd_v2f64(ptr %x, ptr %y, ptr %z) {
5024 ; CHECK-LABEL: fmuladd_v2f64:
5026 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5027 ; CHECK-NEXT: vle64.v v8, (a0)
5028 ; CHECK-NEXT: vle64.v v9, (a1)
5029 ; CHECK-NEXT: vle64.v v10, (a2)
5030 ; CHECK-NEXT: vfmacc.vv v10, v8, v9
5031 ; CHECK-NEXT: vse64.v v10, (a0)
5033 %a = load <2 x double>, ptr %x
5034 %b = load <2 x double>, ptr %y
5035 %c = load <2 x double>, ptr %z
5036 %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
5037 store <2 x double> %d, ptr %x
5041 define void @fmsub_fmuladd_v8bf16(ptr %x, ptr %y, ptr %z) {
5042 ; CHECK-LABEL: fmsub_fmuladd_v8bf16:
5044 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5045 ; CHECK-NEXT: vle16.v v8, (a1)
5046 ; CHECK-NEXT: vle16.v v9, (a0)
5047 ; CHECK-NEXT: vle16.v v10, (a2)
5048 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
5049 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
5050 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5051 ; CHECK-NEXT: vfmul.vv v8, v14, v12
5052 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
5053 ; CHECK-NEXT: vfncvtbf16.f.f.w v11, v8
5054 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v11
5055 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
5056 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5057 ; CHECK-NEXT: vfsub.vv v8, v8, v12
5058 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
5059 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
5060 ; CHECK-NEXT: vse16.v v10, (a0)
5062 %a = load <8 x bfloat>, ptr %x
5063 %b = load <8 x bfloat>, ptr %y
5064 %c = load <8 x bfloat>, ptr %z
5065 %neg = fneg <8 x bfloat> %c
5066 %d = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %neg)
5067 store <8 x bfloat> %d, ptr %x
5071 define void @fmsub_fmuladd_v6bf16(ptr %x, ptr %y, ptr %z) {
5072 ; CHECK-LABEL: fmsub_fmuladd_v6bf16:
5074 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5075 ; CHECK-NEXT: vle16.v v8, (a1)
5076 ; CHECK-NEXT: vle16.v v9, (a0)
5077 ; CHECK-NEXT: vle16.v v10, (a2)
5078 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
5079 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9
5080 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5081 ; CHECK-NEXT: vfmul.vv v8, v14, v12
5082 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
5083 ; CHECK-NEXT: vfncvtbf16.f.f.w v11, v8
5084 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v11
5085 ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10
5086 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5087 ; CHECK-NEXT: vfsub.vv v8, v8, v12
5088 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
5089 ; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
5090 ; CHECK-NEXT: vse16.v v10, (a0)
5092 %a = load <6 x bfloat>, ptr %x
5093 %b = load <6 x bfloat>, ptr %y
5094 %c = load <6 x bfloat>, ptr %z
5095 %neg = fneg <6 x bfloat> %c
5096 %d = call <6 x bfloat> @llvm.fmuladd.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b, <6 x bfloat> %neg)
5097 store <6 x bfloat> %d, ptr %x
5101 define void @fmsub_fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
5102 ; ZVFH-LABEL: fmsub_fmuladd_v8f16:
5104 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5105 ; ZVFH-NEXT: vle16.v v8, (a0)
5106 ; ZVFH-NEXT: vle16.v v9, (a1)
5107 ; ZVFH-NEXT: vle16.v v10, (a2)
5108 ; ZVFH-NEXT: vfmsac.vv v10, v8, v9
5109 ; ZVFH-NEXT: vse16.v v10, (a0)
5112 ; ZVFHMIN-LABEL: fmsub_fmuladd_v8f16:
5114 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
5115 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
5116 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
5117 ; ZVFHMIN-NEXT: vle16.v v10, (a2)
5118 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
5119 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
5120 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5121 ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12
5122 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
5123 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
5124 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
5125 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
5126 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5127 ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v12
5128 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
5129 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
5130 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
5132 %a = load <8 x half>, ptr %x
5133 %b = load <8 x half>, ptr %y
5134 %c = load <8 x half>, ptr %z
5135 %neg = fneg <8 x half> %c
5136 %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg)
5137 store <8 x half> %d, ptr %x
5141 define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
5142 ; ZVFH-LABEL: fmsub_fmuladd_v6f16:
5144 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5145 ; ZVFH-NEXT: vle16.v v8, (a0)
5146 ; ZVFH-NEXT: vle16.v v9, (a1)
5147 ; ZVFH-NEXT: vle16.v v10, (a2)
5148 ; ZVFH-NEXT: vfmsac.vv v10, v8, v9
5149 ; ZVFH-NEXT: vse16.v v10, (a0)
5152 ; ZVFHMIN-LABEL: fmsub_fmuladd_v6f16:
5154 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma
5155 ; ZVFHMIN-NEXT: vle16.v v8, (a1)
5156 ; ZVFHMIN-NEXT: vle16.v v9, (a0)
5157 ; ZVFHMIN-NEXT: vle16.v v10, (a2)
5158 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8
5159 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9
5160 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5161 ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12
5162 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
5163 ; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8
5164 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
5165 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10
5166 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma
5167 ; ZVFHMIN-NEXT: vfsub.vv v8, v8, v12
5168 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
5169 ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8
5170 ; ZVFHMIN-NEXT: vse16.v v10, (a0)
5172 %a = load <6 x half>, ptr %x
5173 %b = load <6 x half>, ptr %y
5174 %c = load <6 x half>, ptr %z
5175 %neg = fneg <6 x half> %c
5176 %d = call <6 x half> @llvm.fmuladd.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %neg)
5177 store <6 x half> %d, ptr %x
5181 define void @fnmsub_fmuladd_v4f32(ptr %x, ptr %y, ptr %z) {
5182 ; CHECK-LABEL: fnmsub_fmuladd_v4f32:
5184 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
5185 ; CHECK-NEXT: vle32.v v8, (a0)
5186 ; CHECK-NEXT: vle32.v v9, (a1)
5187 ; CHECK-NEXT: vle32.v v10, (a2)
5188 ; CHECK-NEXT: vfnmsac.vv v10, v8, v9
5189 ; CHECK-NEXT: vse32.v v10, (a0)
5191 %a = load <4 x float>, ptr %x
5192 %b = load <4 x float>, ptr %y
5193 %c = load <4 x float>, ptr %z
5194 %neg = fneg <4 x float> %a
5195 %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c)
5196 store <4 x float> %d, ptr %x
5200 define void @fnmadd_fmuladd_v2f64(ptr %x, ptr %y, ptr %z) {
5201 ; CHECK-LABEL: fnmadd_fmuladd_v2f64:
5203 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
5204 ; CHECK-NEXT: vle64.v v8, (a0)
5205 ; CHECK-NEXT: vle64.v v9, (a1)
5206 ; CHECK-NEXT: vle64.v v10, (a2)
5207 ; CHECK-NEXT: vfnmacc.vv v10, v8, v9
5208 ; CHECK-NEXT: vse64.v v10, (a0)
5210 %a = load <2 x double>, ptr %x
5211 %b = load <2 x double>, ptr %y
5212 %c = load <2 x double>, ptr %z
5213 %neg = fneg <2 x double> %b
5214 %neg2 = fneg <2 x double> %c
5215 %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2)
5216 store <2 x double> %d, ptr %x