1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s
3 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s
4 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX
5 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc-unknown-linux -mattr=spe | FileCheck %s -check-prefix=SPE
7 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
8 declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
9 declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
10 declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
12 declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
13 declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
14 declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata)
15 declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
17 declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
18 declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
19 declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata)
20 declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
22 declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
23 declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
24 declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata)
25 declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
27 declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
28 declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
29 declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
30 declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
32 declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
33 declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
34 declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata)
35 declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
37 define float @fadd_f32(float %f1, float %f2) #0 {
38 ; CHECK-LABEL: fadd_f32:
40 ; CHECK-NEXT: xsaddsp f1, f1, f2
43 ; NOVSX-LABEL: fadd_f32:
45 ; NOVSX-NEXT: fadds f1, f1, f2
48 ; SPE-LABEL: fadd_f32:
50 ; SPE-NEXT: efsadd r3, r3, r4
52 %res = call float @llvm.experimental.constrained.fadd.f32(
54 metadata !"round.dynamic",
55 metadata !"fpexcept.strict") #0
59 define double @fadd_f64(double %f1, double %f2) #0 {
60 ; CHECK-LABEL: fadd_f64:
62 ; CHECK-NEXT: xsadddp f1, f1, f2
65 ; NOVSX-LABEL: fadd_f64:
67 ; NOVSX-NEXT: fadd f1, f1, f2
70 ; SPE-LABEL: fadd_f64:
72 ; SPE-NEXT: evmergelo r5, r5, r6
73 ; SPE-NEXT: evmergelo r3, r3, r4
74 ; SPE-NEXT: efdadd r4, r3, r5
75 ; SPE-NEXT: evmergehi r3, r4, r4
77 %res = call double @llvm.experimental.constrained.fadd.f64(
78 double %f1, double %f2,
79 metadata !"round.dynamic",
80 metadata !"fpexcept.strict") #0
84 define <4 x float> @fadd_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
85 ; CHECK-LABEL: fadd_v4f32:
87 ; CHECK-NEXT: xvaddsp v2, v2, v3
90 ; NOVSX-LABEL: fadd_v4f32:
92 ; NOVSX-NEXT: addi r3, r1, -32
93 ; NOVSX-NEXT: stvx v3, 0, r3
94 ; NOVSX-NEXT: addi r3, r1, -48
95 ; NOVSX-NEXT: stvx v2, 0, r3
96 ; NOVSX-NEXT: addi r3, r1, -16
97 ; NOVSX-NEXT: lfs f0, -20(r1)
98 ; NOVSX-NEXT: lfs f1, -36(r1)
99 ; NOVSX-NEXT: fadds f0, f1, f0
100 ; NOVSX-NEXT: lfs f1, -40(r1)
101 ; NOVSX-NEXT: stfs f0, -4(r1)
102 ; NOVSX-NEXT: lfs f0, -24(r1)
103 ; NOVSX-NEXT: fadds f0, f1, f0
104 ; NOVSX-NEXT: lfs f1, -44(r1)
105 ; NOVSX-NEXT: stfs f0, -8(r1)
106 ; NOVSX-NEXT: lfs f0, -28(r1)
107 ; NOVSX-NEXT: fadds f0, f1, f0
108 ; NOVSX-NEXT: lfs f1, -48(r1)
109 ; NOVSX-NEXT: stfs f0, -12(r1)
110 ; NOVSX-NEXT: lfs f0, -32(r1)
111 ; NOVSX-NEXT: fadds f0, f1, f0
112 ; NOVSX-NEXT: stfs f0, -16(r1)
113 ; NOVSX-NEXT: lvx v2, 0, r3
116 ; SPE-LABEL: fadd_v4f32:
118 ; SPE-NEXT: efsadd r6, r6, r10
119 ; SPE-NEXT: efsadd r5, r5, r9
120 ; SPE-NEXT: efsadd r4, r4, r8
121 ; SPE-NEXT: efsadd r3, r3, r7
123 %res = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(
124 <4 x float> %vf1, <4 x float> %vf2,
125 metadata !"round.dynamic",
126 metadata !"fpexcept.strict") #0
130 define <2 x double> @fadd_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
131 ; CHECK-LABEL: fadd_v2f64:
133 ; CHECK-NEXT: xvadddp v2, v2, v3
136 ; NOVSX-LABEL: fadd_v2f64:
138 ; NOVSX-NEXT: fadd f2, f2, f4
139 ; NOVSX-NEXT: fadd f1, f1, f3
142 ; SPE-LABEL: fadd_v2f64:
144 ; SPE-NEXT: evldd r4, 8(r1)
145 ; SPE-NEXT: evmergelo r7, r7, r8
146 ; SPE-NEXT: evmergelo r8, r9, r10
148 ; SPE-NEXT: evmergelo r5, r5, r6
149 ; SPE-NEXT: efdadd r4, r7, r4
150 ; SPE-NEXT: evstddx r4, r3, r9
151 ; SPE-NEXT: efdadd r4, r5, r8
152 ; SPE-NEXT: evstdd r4, 0(r3)
154 %res = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
155 <2 x double> %vf1, <2 x double> %vf2,
156 metadata !"round.dynamic",
157 metadata !"fpexcept.strict") #0
158 ret <2 x double> %res
161 define float @fsub_f32(float %f1, float %f2) #0 {
162 ; CHECK-LABEL: fsub_f32:
164 ; CHECK-NEXT: xssubsp f1, f1, f2
167 ; NOVSX-LABEL: fsub_f32:
169 ; NOVSX-NEXT: fsubs f1, f1, f2
172 ; SPE-LABEL: fsub_f32:
174 ; SPE-NEXT: efssub r3, r3, r4
177 %res = call float @llvm.experimental.constrained.fsub.f32(
178 float %f1, float %f2,
179 metadata !"round.dynamic",
180 metadata !"fpexcept.strict") #0
184 define double @fsub_f64(double %f1, double %f2) #0 {
185 ; CHECK-LABEL: fsub_f64:
187 ; CHECK-NEXT: xssubdp f1, f1, f2
190 ; NOVSX-LABEL: fsub_f64:
192 ; NOVSX-NEXT: fsub f1, f1, f2
195 ; SPE-LABEL: fsub_f64:
197 ; SPE-NEXT: evmergelo r5, r5, r6
198 ; SPE-NEXT: evmergelo r3, r3, r4
199 ; SPE-NEXT: efdsub r4, r3, r5
200 ; SPE-NEXT: evmergehi r3, r4, r4
203 %res = call double @llvm.experimental.constrained.fsub.f64(
204 double %f1, double %f2,
205 metadata !"round.dynamic",
206 metadata !"fpexcept.strict") #0
210 define <4 x float> @fsub_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
211 ; CHECK-LABEL: fsub_v4f32:
213 ; CHECK-NEXT: xvsubsp v2, v2, v3
216 ; NOVSX-LABEL: fsub_v4f32:
218 ; NOVSX-NEXT: addi r3, r1, -32
219 ; NOVSX-NEXT: stvx v3, 0, r3
220 ; NOVSX-NEXT: addi r3, r1, -48
221 ; NOVSX-NEXT: stvx v2, 0, r3
222 ; NOVSX-NEXT: addi r3, r1, -16
223 ; NOVSX-NEXT: lfs f0, -20(r1)
224 ; NOVSX-NEXT: lfs f1, -36(r1)
225 ; NOVSX-NEXT: fsubs f0, f1, f0
226 ; NOVSX-NEXT: lfs f1, -40(r1)
227 ; NOVSX-NEXT: stfs f0, -4(r1)
228 ; NOVSX-NEXT: lfs f0, -24(r1)
229 ; NOVSX-NEXT: fsubs f0, f1, f0
230 ; NOVSX-NEXT: lfs f1, -44(r1)
231 ; NOVSX-NEXT: stfs f0, -8(r1)
232 ; NOVSX-NEXT: lfs f0, -28(r1)
233 ; NOVSX-NEXT: fsubs f0, f1, f0
234 ; NOVSX-NEXT: lfs f1, -48(r1)
235 ; NOVSX-NEXT: stfs f0, -12(r1)
236 ; NOVSX-NEXT: lfs f0, -32(r1)
237 ; NOVSX-NEXT: fsubs f0, f1, f0
238 ; NOVSX-NEXT: stfs f0, -16(r1)
239 ; NOVSX-NEXT: lvx v2, 0, r3
242 ; SPE-LABEL: fsub_v4f32:
244 ; SPE-NEXT: efssub r6, r6, r10
245 ; SPE-NEXT: efssub r5, r5, r9
246 ; SPE-NEXT: efssub r4, r4, r8
247 ; SPE-NEXT: efssub r3, r3, r7
249 %res = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(
250 <4 x float> %vf1, <4 x float> %vf2,
251 metadata !"round.dynamic",
252 metadata !"fpexcept.strict") #0
253 ret <4 x float> %res;
256 define <2 x double> @fsub_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
257 ; CHECK-LABEL: fsub_v2f64:
259 ; CHECK-NEXT: xvsubdp v2, v2, v3
262 ; NOVSX-LABEL: fsub_v2f64:
264 ; NOVSX-NEXT: fsub f2, f2, f4
265 ; NOVSX-NEXT: fsub f1, f1, f3
268 ; SPE-LABEL: fsub_v2f64:
270 ; SPE-NEXT: evldd r4, 8(r1)
271 ; SPE-NEXT: evmergelo r7, r7, r8
272 ; SPE-NEXT: evmergelo r8, r9, r10
274 ; SPE-NEXT: evmergelo r5, r5, r6
275 ; SPE-NEXT: efdsub r4, r7, r4
276 ; SPE-NEXT: evstddx r4, r3, r9
277 ; SPE-NEXT: efdsub r4, r5, r8
278 ; SPE-NEXT: evstdd r4, 0(r3)
280 %res = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
281 <2 x double> %vf1, <2 x double> %vf2,
282 metadata !"round.dynamic",
283 metadata !"fpexcept.strict") #0
284 ret <2 x double> %res;
287 define float @fmul_f32(float %f1, float %f2) #0 {
288 ; CHECK-LABEL: fmul_f32:
290 ; CHECK-NEXT: xsmulsp f1, f1, f2
293 ; NOVSX-LABEL: fmul_f32:
295 ; NOVSX-NEXT: fmuls f1, f1, f2
298 ; SPE-LABEL: fmul_f32:
300 ; SPE-NEXT: efsmul r3, r3, r4
303 %res = call float @llvm.experimental.constrained.fmul.f32(
304 float %f1, float %f2,
305 metadata !"round.dynamic",
306 metadata !"fpexcept.strict") #0
310 define double @fmul_f64(double %f1, double %f2) #0 {
311 ; CHECK-LABEL: fmul_f64:
313 ; CHECK-NEXT: xsmuldp f1, f1, f2
316 ; NOVSX-LABEL: fmul_f64:
318 ; NOVSX-NEXT: fmul f1, f1, f2
321 ; SPE-LABEL: fmul_f64:
323 ; SPE-NEXT: evmergelo r5, r5, r6
324 ; SPE-NEXT: evmergelo r3, r3, r4
325 ; SPE-NEXT: efdmul r4, r3, r5
326 ; SPE-NEXT: evmergehi r3, r4, r4
329 %res = call double @llvm.experimental.constrained.fmul.f64(
330 double %f1, double %f2,
331 metadata !"round.dynamic",
332 metadata !"fpexcept.strict") #0
336 define <4 x float> @fmul_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
337 ; CHECK-LABEL: fmul_v4f32:
339 ; CHECK-NEXT: xvmulsp v2, v2, v3
342 ; NOVSX-LABEL: fmul_v4f32:
344 ; NOVSX-NEXT: addi r3, r1, -32
345 ; NOVSX-NEXT: stvx v3, 0, r3
346 ; NOVSX-NEXT: addi r3, r1, -48
347 ; NOVSX-NEXT: stvx v2, 0, r3
348 ; NOVSX-NEXT: addi r3, r1, -16
349 ; NOVSX-NEXT: lfs f0, -20(r1)
350 ; NOVSX-NEXT: lfs f1, -36(r1)
351 ; NOVSX-NEXT: fmuls f0, f1, f0
352 ; NOVSX-NEXT: lfs f1, -40(r1)
353 ; NOVSX-NEXT: stfs f0, -4(r1)
354 ; NOVSX-NEXT: lfs f0, -24(r1)
355 ; NOVSX-NEXT: fmuls f0, f1, f0
356 ; NOVSX-NEXT: lfs f1, -44(r1)
357 ; NOVSX-NEXT: stfs f0, -8(r1)
358 ; NOVSX-NEXT: lfs f0, -28(r1)
359 ; NOVSX-NEXT: fmuls f0, f1, f0
360 ; NOVSX-NEXT: lfs f1, -48(r1)
361 ; NOVSX-NEXT: stfs f0, -12(r1)
362 ; NOVSX-NEXT: lfs f0, -32(r1)
363 ; NOVSX-NEXT: fmuls f0, f1, f0
364 ; NOVSX-NEXT: stfs f0, -16(r1)
365 ; NOVSX-NEXT: lvx v2, 0, r3
368 ; SPE-LABEL: fmul_v4f32:
370 ; SPE-NEXT: efsmul r6, r6, r10
371 ; SPE-NEXT: efsmul r5, r5, r9
372 ; SPE-NEXT: efsmul r4, r4, r8
373 ; SPE-NEXT: efsmul r3, r3, r7
375 %res = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(
376 <4 x float> %vf1, <4 x float> %vf2,
377 metadata !"round.dynamic",
378 metadata !"fpexcept.strict") #0
379 ret <4 x float> %res;
382 define <2 x double> @fmul_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
383 ; CHECK-LABEL: fmul_v2f64:
385 ; CHECK-NEXT: xvmuldp v2, v2, v3
388 ; NOVSX-LABEL: fmul_v2f64:
390 ; NOVSX-NEXT: fmul f2, f2, f4
391 ; NOVSX-NEXT: fmul f1, f1, f3
394 ; SPE-LABEL: fmul_v2f64:
396 ; SPE-NEXT: evldd r4, 8(r1)
397 ; SPE-NEXT: evmergelo r7, r7, r8
398 ; SPE-NEXT: evmergelo r8, r9, r10
400 ; SPE-NEXT: evmergelo r5, r5, r6
401 ; SPE-NEXT: efdmul r4, r7, r4
402 ; SPE-NEXT: evstddx r4, r3, r9
403 ; SPE-NEXT: efdmul r4, r5, r8
404 ; SPE-NEXT: evstdd r4, 0(r3)
406 %res = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
407 <2 x double> %vf1, <2 x double> %vf2,
408 metadata !"round.dynamic",
409 metadata !"fpexcept.strict") #0
410 ret <2 x double> %res;
413 define float @fdiv_f32(float %f1, float %f2) #0 {
414 ; CHECK-LABEL: fdiv_f32:
416 ; CHECK-NEXT: xsdivsp f1, f1, f2
419 ; NOVSX-LABEL: fdiv_f32:
421 ; NOVSX-NEXT: fdivs f1, f1, f2
424 ; SPE-LABEL: fdiv_f32:
426 ; SPE-NEXT: efsdiv r3, r3, r4
429 %res = call float @llvm.experimental.constrained.fdiv.f32(
430 float %f1, float %f2,
431 metadata !"round.dynamic",
432 metadata !"fpexcept.strict") #0
436 define double @fdiv_f64(double %f1, double %f2) #0 {
437 ; CHECK-LABEL: fdiv_f64:
439 ; CHECK-NEXT: xsdivdp f1, f1, f2
442 ; NOVSX-LABEL: fdiv_f64:
444 ; NOVSX-NEXT: fdiv f1, f1, f2
447 ; SPE-LABEL: fdiv_f64:
449 ; SPE-NEXT: evmergelo r5, r5, r6
450 ; SPE-NEXT: evmergelo r3, r3, r4
451 ; SPE-NEXT: efddiv r4, r3, r5
452 ; SPE-NEXT: evmergehi r3, r4, r4
455 %res = call double @llvm.experimental.constrained.fdiv.f64(
456 double %f1, double %f2,
457 metadata !"round.dynamic",
458 metadata !"fpexcept.strict") #0
462 define <4 x float> @fdiv_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
463 ; CHECK-LABEL: fdiv_v4f32:
465 ; CHECK-NEXT: xvdivsp v2, v2, v3
468 ; NOVSX-LABEL: fdiv_v4f32:
470 ; NOVSX-NEXT: addi r3, r1, -32
471 ; NOVSX-NEXT: stvx v3, 0, r3
472 ; NOVSX-NEXT: addi r3, r1, -48
473 ; NOVSX-NEXT: stvx v2, 0, r3
474 ; NOVSX-NEXT: addi r3, r1, -16
475 ; NOVSX-NEXT: lfs f0, -20(r1)
476 ; NOVSX-NEXT: lfs f1, -36(r1)
477 ; NOVSX-NEXT: fdivs f0, f1, f0
478 ; NOVSX-NEXT: lfs f1, -40(r1)
479 ; NOVSX-NEXT: stfs f0, -4(r1)
480 ; NOVSX-NEXT: lfs f0, -24(r1)
481 ; NOVSX-NEXT: fdivs f0, f1, f0
482 ; NOVSX-NEXT: lfs f1, -44(r1)
483 ; NOVSX-NEXT: stfs f0, -8(r1)
484 ; NOVSX-NEXT: lfs f0, -28(r1)
485 ; NOVSX-NEXT: fdivs f0, f1, f0
486 ; NOVSX-NEXT: lfs f1, -48(r1)
487 ; NOVSX-NEXT: stfs f0, -12(r1)
488 ; NOVSX-NEXT: lfs f0, -32(r1)
489 ; NOVSX-NEXT: fdivs f0, f1, f0
490 ; NOVSX-NEXT: stfs f0, -16(r1)
491 ; NOVSX-NEXT: lvx v2, 0, r3
494 ; SPE-LABEL: fdiv_v4f32:
496 ; SPE-NEXT: efsdiv r6, r6, r10
497 ; SPE-NEXT: efsdiv r5, r5, r9
498 ; SPE-NEXT: efsdiv r4, r4, r8
499 ; SPE-NEXT: efsdiv r3, r3, r7
501 %res = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(
502 <4 x float> %vf1, <4 x float> %vf2,
503 metadata !"round.dynamic",
504 metadata !"fpexcept.strict") #0
508 define <2 x double> @fdiv_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
509 ; CHECK-LABEL: fdiv_v2f64:
511 ; CHECK-NEXT: xvdivdp v2, v2, v3
514 ; NOVSX-LABEL: fdiv_v2f64:
516 ; NOVSX-NEXT: fdiv f2, f2, f4
517 ; NOVSX-NEXT: fdiv f1, f1, f3
520 ; SPE-LABEL: fdiv_v2f64:
522 ; SPE-NEXT: evldd r4, 8(r1)
523 ; SPE-NEXT: evmergelo r7, r7, r8
524 ; SPE-NEXT: evmergelo r8, r9, r10
525 ; SPE-NEXT: evmergelo r5, r5, r6
526 ; SPE-NEXT: efddiv r4, r7, r4
528 ; SPE-NEXT: evstddx r4, r3, r7
529 ; SPE-NEXT: efddiv r4, r5, r8
530 ; SPE-NEXT: evstdd r4, 0(r3)
532 %res = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
533 <2 x double> %vf1, <2 x double> %vf2,
534 metadata !"round.dynamic",
535 metadata !"fpexcept.strict") #0
536 ret <2 x double> %res
539 define double @no_fma_fold(double %f1, double %f2, double %f3) #0 {
540 ; CHECK-LABEL: no_fma_fold:
542 ; CHECK-NEXT: xsmuldp f0, f1, f2
543 ; CHECK-NEXT: xsadddp f1, f0, f3
546 ; NOVSX-LABEL: no_fma_fold:
548 ; NOVSX-NEXT: fmul f0, f1, f2
549 ; NOVSX-NEXT: fadd f1, f0, f3
552 ; SPE-LABEL: no_fma_fold:
554 ; SPE-NEXT: evmergelo r7, r7, r8
555 ; SPE-NEXT: evmergelo r5, r5, r6
556 ; SPE-NEXT: evmergelo r3, r3, r4
557 ; SPE-NEXT: efdmul r3, r3, r5
558 ; SPE-NEXT: efdadd r4, r3, r7
559 ; SPE-NEXT: evmergehi r3, r4, r4
561 %mul = call double @llvm.experimental.constrained.fmul.f64(
562 double %f1, double %f2,
563 metadata !"round.dynamic",
564 metadata !"fpexcept.strict") #0
565 %add = call double @llvm.experimental.constrained.fadd.f64(
566 double %mul, double %f3,
567 metadata !"round.dynamic",
568 metadata !"fpexcept.strict") #0
572 define float @fmadd_f32(float %f0, float %f1, float %f2) #0 {
573 ; CHECK-LABEL: fmadd_f32:
575 ; CHECK-NEXT: xsmaddasp f3, f1, f2
576 ; CHECK-NEXT: fmr f1, f3
579 ; NOVSX-LABEL: fmadd_f32:
581 ; NOVSX-NEXT: fmadds f1, f1, f2, f3
584 ; SPE-LABEL: fmadd_f32:
587 ; SPE-NEXT: stwu r1, -16(r1)
588 ; SPE-NEXT: stw r0, 20(r1)
589 ; SPE-NEXT: .cfi_def_cfa_offset 16
590 ; SPE-NEXT: .cfi_offset lr, 4
592 ; SPE-NEXT: lwz r0, 20(r1)
593 ; SPE-NEXT: addi r1, r1, 16
596 %res = call float @llvm.experimental.constrained.fma.f32(
597 float %f0, float %f1, float %f2,
598 metadata !"round.dynamic",
599 metadata !"fpexcept.strict") #0
603 define double @fmadd_f64(double %f0, double %f1, double %f2) #0 {
604 ; CHECK-LABEL: fmadd_f64:
606 ; CHECK-NEXT: xsmaddadp f3, f1, f2
607 ; CHECK-NEXT: fmr f1, f3
610 ; NOVSX-LABEL: fmadd_f64:
612 ; NOVSX-NEXT: fmadd f1, f1, f2, f3
615 ; SPE-LABEL: fmadd_f64:
618 ; SPE-NEXT: stwu r1, -16(r1)
619 ; SPE-NEXT: stw r0, 20(r1)
620 ; SPE-NEXT: .cfi_def_cfa_offset 16
621 ; SPE-NEXT: .cfi_offset lr, 4
622 ; SPE-NEXT: evmergelo r8, r7, r8
623 ; SPE-NEXT: evmergelo r6, r5, r6
624 ; SPE-NEXT: evmergelo r4, r3, r4
625 ; SPE-NEXT: evmergehi r3, r4, r4
626 ; SPE-NEXT: evmergehi r5, r6, r6
627 ; SPE-NEXT: evmergehi r7, r8, r8
629 ; SPE-NEXT: evmergelo r4, r3, r4
630 ; SPE-NEXT: evmergehi r3, r4, r4
631 ; SPE-NEXT: lwz r0, 20(r1)
632 ; SPE-NEXT: addi r1, r1, 16
635 %res = call double @llvm.experimental.constrained.fma.f64(
636 double %f0, double %f1, double %f2,
637 metadata !"round.dynamic",
638 metadata !"fpexcept.strict") #0
642 define <4 x float> @fmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
643 ; CHECK-LABEL: fmadd_v4f32:
645 ; CHECK-NEXT: xvmaddasp v4, v2, v3
646 ; CHECK-NEXT: vmr v2, v4
649 ; NOVSX-LABEL: fmadd_v4f32:
651 ; NOVSX-NEXT: addi r3, r1, -32
652 ; NOVSX-NEXT: stvx v4, 0, r3
653 ; NOVSX-NEXT: addi r3, r1, -48
654 ; NOVSX-NEXT: stvx v3, 0, r3
655 ; NOVSX-NEXT: addi r3, r1, -64
656 ; NOVSX-NEXT: stvx v2, 0, r3
657 ; NOVSX-NEXT: addi r3, r1, -16
658 ; NOVSX-NEXT: lfs f0, -20(r1)
659 ; NOVSX-NEXT: lfs f1, -36(r1)
660 ; NOVSX-NEXT: lfs f2, -52(r1)
661 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
662 ; NOVSX-NEXT: lfs f1, -40(r1)
663 ; NOVSX-NEXT: lfs f2, -56(r1)
664 ; NOVSX-NEXT: stfs f0, -4(r1)
665 ; NOVSX-NEXT: lfs f0, -24(r1)
666 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
667 ; NOVSX-NEXT: lfs f1, -44(r1)
668 ; NOVSX-NEXT: lfs f2, -60(r1)
669 ; NOVSX-NEXT: stfs f0, -8(r1)
670 ; NOVSX-NEXT: lfs f0, -28(r1)
671 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
672 ; NOVSX-NEXT: lfs f1, -48(r1)
673 ; NOVSX-NEXT: lfs f2, -64(r1)
674 ; NOVSX-NEXT: stfs f0, -12(r1)
675 ; NOVSX-NEXT: lfs f0, -32(r1)
676 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
677 ; NOVSX-NEXT: stfs f0, -16(r1)
678 ; NOVSX-NEXT: lvx v2, 0, r3
681 ; SPE-LABEL: fmadd_v4f32:
684 ; SPE-NEXT: stwu r1, -64(r1)
685 ; SPE-NEXT: stw r0, 68(r1)
686 ; SPE-NEXT: .cfi_def_cfa_offset 64
687 ; SPE-NEXT: .cfi_offset lr, 4
688 ; SPE-NEXT: .cfi_offset r21, -44
689 ; SPE-NEXT: .cfi_offset r22, -40
690 ; SPE-NEXT: .cfi_offset r23, -36
691 ; SPE-NEXT: .cfi_offset r24, -32
692 ; SPE-NEXT: .cfi_offset r25, -28
693 ; SPE-NEXT: .cfi_offset r26, -24
694 ; SPE-NEXT: .cfi_offset r27, -20
695 ; SPE-NEXT: .cfi_offset r28, -16
696 ; SPE-NEXT: .cfi_offset r29, -12
697 ; SPE-NEXT: .cfi_offset r30, -8
698 ; SPE-NEXT: stw r27, 44(r1) # 4-byte Folded Spill
699 ; SPE-NEXT: mr r27, r5
700 ; SPE-NEXT: lwz r5, 84(r1)
701 ; SPE-NEXT: stw r25, 36(r1) # 4-byte Folded Spill
702 ; SPE-NEXT: mr r25, r3
703 ; SPE-NEXT: stw r26, 40(r1) # 4-byte Folded Spill
704 ; SPE-NEXT: mr r26, r4
705 ; SPE-NEXT: mr r3, r6
706 ; SPE-NEXT: mr r4, r10
707 ; SPE-NEXT: stw r21, 20(r1) # 4-byte Folded Spill
708 ; SPE-NEXT: stw r22, 24(r1) # 4-byte Folded Spill
709 ; SPE-NEXT: stw r23, 28(r1) # 4-byte Folded Spill
710 ; SPE-NEXT: stw r24, 32(r1) # 4-byte Folded Spill
711 ; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill
712 ; SPE-NEXT: mr r28, r7
713 ; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill
714 ; SPE-NEXT: mr r29, r8
715 ; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill
716 ; SPE-NEXT: mr r30, r9
717 ; SPE-NEXT: lwz r24, 72(r1)
718 ; SPE-NEXT: lwz r23, 76(r1)
719 ; SPE-NEXT: lwz r22, 80(r1)
721 ; SPE-NEXT: mr r21, r3
722 ; SPE-NEXT: mr r3, r27
723 ; SPE-NEXT: mr r4, r30
724 ; SPE-NEXT: mr r5, r22
726 ; SPE-NEXT: mr r30, r3
727 ; SPE-NEXT: mr r3, r26
728 ; SPE-NEXT: mr r4, r29
729 ; SPE-NEXT: mr r5, r23
731 ; SPE-NEXT: mr r29, r3
732 ; SPE-NEXT: mr r3, r25
733 ; SPE-NEXT: mr r4, r28
734 ; SPE-NEXT: mr r5, r24
736 ; SPE-NEXT: mr r4, r29
737 ; SPE-NEXT: mr r5, r30
738 ; SPE-NEXT: mr r6, r21
739 ; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload
740 ; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload
741 ; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload
742 ; SPE-NEXT: lwz r27, 44(r1) # 4-byte Folded Reload
743 ; SPE-NEXT: lwz r26, 40(r1) # 4-byte Folded Reload
744 ; SPE-NEXT: lwz r25, 36(r1) # 4-byte Folded Reload
745 ; SPE-NEXT: lwz r24, 32(r1) # 4-byte Folded Reload
746 ; SPE-NEXT: lwz r23, 28(r1) # 4-byte Folded Reload
747 ; SPE-NEXT: lwz r22, 24(r1) # 4-byte Folded Reload
748 ; SPE-NEXT: lwz r21, 20(r1) # 4-byte Folded Reload
749 ; SPE-NEXT: lwz r0, 68(r1)
750 ; SPE-NEXT: addi r1, r1, 64
753 %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
754 <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2,
755 metadata !"round.dynamic",
756 metadata !"fpexcept.strict") #0
760 define <2 x double> @fmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
761 ; CHECK-LABEL: fmadd_v2f64:
763 ; CHECK-NEXT: xvmaddadp v4, v2, v3
764 ; CHECK-NEXT: vmr v2, v4
767 ; NOVSX-LABEL: fmadd_v2f64:
769 ; NOVSX-NEXT: fmadd f2, f2, f4, f6
770 ; NOVSX-NEXT: fmadd f1, f1, f3, f5
773 ; SPE-LABEL: fmadd_v2f64:
776 ; SPE-NEXT: stwu r1, -80(r1)
777 ; SPE-NEXT: stw r0, 84(r1)
778 ; SPE-NEXT: .cfi_def_cfa_offset 80
779 ; SPE-NEXT: .cfi_offset lr, 4
780 ; SPE-NEXT: .cfi_offset r26, -64
781 ; SPE-NEXT: .cfi_offset r27, -56
782 ; SPE-NEXT: .cfi_offset r28, -48
783 ; SPE-NEXT: .cfi_offset r29, -40
784 ; SPE-NEXT: .cfi_offset r30, -8
785 ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill
786 ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill
787 ; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill
788 ; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill
789 ; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
790 ; SPE-NEXT: evmergelo r27, r7, r8
791 ; SPE-NEXT: evmergelo r9, r9, r10
792 ; SPE-NEXT: evmergelo r4, r5, r6
793 ; SPE-NEXT: mr r30, r3
794 ; SPE-NEXT: evldd r8, 96(r1)
795 ; SPE-NEXT: evmergehi r3, r4, r4
796 ; SPE-NEXT: evmergehi r5, r9, r9
797 ; SPE-NEXT: mr r6, r9
798 ; SPE-NEXT: evldd r29, 104(r1)
799 ; SPE-NEXT: evmergehi r7, r8, r8
800 ; SPE-NEXT: evldd r28, 88(r1)
802 ; SPE-NEXT: evmergelo r26, r3, r4
803 ; SPE-NEXT: evmergehi r3, r27, r27
804 ; SPE-NEXT: evmergehi r5, r28, r28
805 ; SPE-NEXT: evmergehi r7, r29, r29
806 ; SPE-NEXT: mr r4, r27
807 ; SPE-NEXT: mr r6, r28
808 ; SPE-NEXT: mr r8, r29
811 ; SPE-NEXT: evmergelo r3, r3, r4
812 ; SPE-NEXT: evstddx r3, r30, r5
813 ; SPE-NEXT: evstdd r26, 0(r30)
814 ; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
815 ; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload
816 ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload
817 ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload
818 ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload
819 ; SPE-NEXT: lwz r0, 84(r1)
820 ; SPE-NEXT: addi r1, r1, 80
823 %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
824 <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2,
825 metadata !"round.dynamic",
826 metadata !"fpexcept.strict") #0
827 ret <2 x double> %res
830 define float @fmsub_f32(float %f0, float %f1, float %f2) #0 {
831 ; CHECK-LABEL: fmsub_f32:
833 ; CHECK-NEXT: xsmsubasp f3, f1, f2
834 ; CHECK-NEXT: fmr f1, f3
837 ; NOVSX-LABEL: fmsub_f32:
839 ; NOVSX-NEXT: fmsubs f1, f1, f2, f3
842 ; SPE-LABEL: fmsub_f32:
845 ; SPE-NEXT: stwu r1, -16(r1)
846 ; SPE-NEXT: stw r0, 20(r1)
847 ; SPE-NEXT: .cfi_def_cfa_offset 16
848 ; SPE-NEXT: .cfi_offset lr, 4
849 ; SPE-NEXT: efsneg r5, r5
851 ; SPE-NEXT: lwz r0, 20(r1)
852 ; SPE-NEXT: addi r1, r1, 16
855 %neg = fneg float %f2
856 %res = call float @llvm.experimental.constrained.fma.f32(
857 float %f0, float %f1, float %neg,
858 metadata !"round.dynamic",
859 metadata !"fpexcept.strict") #0
863 define double @fmsub_f64(double %f0, double %f1, double %f2) #0 {
864 ; CHECK-LABEL: fmsub_f64:
866 ; CHECK-NEXT: xsmsubadp f3, f1, f2
867 ; CHECK-NEXT: fmr f1, f3
870 ; NOVSX-LABEL: fmsub_f64:
872 ; NOVSX-NEXT: fmsub f1, f1, f2, f3
875 ; SPE-LABEL: fmsub_f64:
878 ; SPE-NEXT: stwu r1, -16(r1)
879 ; SPE-NEXT: stw r0, 20(r1)
880 ; SPE-NEXT: .cfi_def_cfa_offset 16
881 ; SPE-NEXT: .cfi_offset lr, 4
882 ; SPE-NEXT: evmergelo r6, r5, r6
883 ; SPE-NEXT: evmergelo r4, r3, r4
884 ; SPE-NEXT: evmergelo r3, r7, r8
885 ; SPE-NEXT: efdneg r8, r3
886 ; SPE-NEXT: evmergehi r3, r4, r4
887 ; SPE-NEXT: evmergehi r5, r6, r6
888 ; SPE-NEXT: evmergehi r7, r8, r8
890 ; SPE-NEXT: evmergelo r4, r3, r4
891 ; SPE-NEXT: evmergehi r3, r4, r4
892 ; SPE-NEXT: lwz r0, 20(r1)
893 ; SPE-NEXT: addi r1, r1, 16
896 %neg = fneg double %f2
897 %res = call double @llvm.experimental.constrained.fma.f64(
898 double %f0, double %f1, double %neg,
899 metadata !"round.dynamic",
900 metadata !"fpexcept.strict") #0
904 define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
905 ; CHECK-LABEL: fmsub_v4f32:
907 ; CHECK-NEXT: xvmsubasp v4, v2, v3
908 ; CHECK-NEXT: vmr v2, v4
911 ; NOVSX-LABEL: fmsub_v4f32:
913 ; NOVSX-NEXT: vspltisb v5, -1
914 ; NOVSX-NEXT: addi r3, r1, -48
915 ; NOVSX-NEXT: vslw v5, v5, v5
916 ; NOVSX-NEXT: stvx v3, 0, r3
917 ; NOVSX-NEXT: addi r3, r1, -64
918 ; NOVSX-NEXT: vxor v4, v4, v5
919 ; NOVSX-NEXT: stvx v2, 0, r3
920 ; NOVSX-NEXT: addi r3, r1, -32
921 ; NOVSX-NEXT: stvx v4, 0, r3
922 ; NOVSX-NEXT: addi r3, r1, -16
923 ; NOVSX-NEXT: lfs f0, -36(r1)
924 ; NOVSX-NEXT: lfs f1, -52(r1)
925 ; NOVSX-NEXT: lfs f2, -20(r1)
926 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
927 ; NOVSX-NEXT: lfs f1, -56(r1)
928 ; NOVSX-NEXT: lfs f2, -24(r1)
929 ; NOVSX-NEXT: stfs f0, -4(r1)
930 ; NOVSX-NEXT: lfs f0, -40(r1)
931 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
932 ; NOVSX-NEXT: lfs f1, -60(r1)
933 ; NOVSX-NEXT: lfs f2, -28(r1)
934 ; NOVSX-NEXT: stfs f0, -8(r1)
935 ; NOVSX-NEXT: lfs f0, -44(r1)
936 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
937 ; NOVSX-NEXT: lfs f1, -64(r1)
938 ; NOVSX-NEXT: lfs f2, -32(r1)
939 ; NOVSX-NEXT: stfs f0, -12(r1)
940 ; NOVSX-NEXT: lfs f0, -48(r1)
941 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
942 ; NOVSX-NEXT: stfs f0, -16(r1)
943 ; NOVSX-NEXT: lvx v2, 0, r3
946 ; SPE-LABEL: fmsub_v4f32:
949 ; SPE-NEXT: stwu r1, -64(r1)
950 ; SPE-NEXT: stw r0, 68(r1)
951 ; SPE-NEXT: .cfi_def_cfa_offset 64
952 ; SPE-NEXT: .cfi_offset lr, 4
953 ; SPE-NEXT: .cfi_offset r21, -44
954 ; SPE-NEXT: .cfi_offset r22, -40
955 ; SPE-NEXT: .cfi_offset r23, -36
956 ; SPE-NEXT: .cfi_offset r24, -32
957 ; SPE-NEXT: .cfi_offset r25, -28
958 ; SPE-NEXT: .cfi_offset r26, -24
959 ; SPE-NEXT: .cfi_offset r27, -20
960 ; SPE-NEXT: .cfi_offset r28, -16
961 ; SPE-NEXT: .cfi_offset r29, -12
962 ; SPE-NEXT: .cfi_offset r30, -8
963 ; SPE-NEXT: stw r25, 36(r1) # 4-byte Folded Spill
964 ; SPE-NEXT: mr r25, r3
965 ; SPE-NEXT: stw r26, 40(r1) # 4-byte Folded Spill
966 ; SPE-NEXT: mr r26, r4
967 ; SPE-NEXT: stw r27, 44(r1) # 4-byte Folded Spill
968 ; SPE-NEXT: mr r27, r5
969 ; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill
970 ; SPE-NEXT: mr r28, r7
971 ; SPE-NEXT: lwz r3, 80(r1)
972 ; SPE-NEXT: lwz r4, 72(r1)
973 ; SPE-NEXT: lwz r5, 76(r1)
974 ; SPE-NEXT: lwz r7, 84(r1)
975 ; SPE-NEXT: stw r22, 24(r1) # 4-byte Folded Spill
976 ; SPE-NEXT: efsneg r22, r3
977 ; SPE-NEXT: stw r23, 28(r1) # 4-byte Folded Spill
978 ; SPE-NEXT: efsneg r23, r5
979 ; SPE-NEXT: stw r24, 32(r1) # 4-byte Folded Spill
980 ; SPE-NEXT: efsneg r24, r4
981 ; SPE-NEXT: efsneg r5, r7
982 ; SPE-NEXT: mr r3, r6
983 ; SPE-NEXT: mr r4, r10
984 ; SPE-NEXT: stw r21, 20(r1) # 4-byte Folded Spill
985 ; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill
986 ; SPE-NEXT: mr r29, r8
987 ; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill
988 ; SPE-NEXT: mr r30, r9
990 ; SPE-NEXT: mr r21, r3
991 ; SPE-NEXT: mr r3, r27
992 ; SPE-NEXT: mr r4, r30
993 ; SPE-NEXT: mr r5, r22
995 ; SPE-NEXT: mr r30, r3
996 ; SPE-NEXT: mr r3, r26
997 ; SPE-NEXT: mr r4, r29
998 ; SPE-NEXT: mr r5, r23
1000 ; SPE-NEXT: mr r29, r3
1001 ; SPE-NEXT: mr r3, r25
1002 ; SPE-NEXT: mr r4, r28
1003 ; SPE-NEXT: mr r5, r24
1005 ; SPE-NEXT: mr r4, r29
1006 ; SPE-NEXT: mr r5, r30
1007 ; SPE-NEXT: mr r6, r21
1008 ; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload
1009 ; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload
1010 ; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload
1011 ; SPE-NEXT: lwz r27, 44(r1) # 4-byte Folded Reload
1012 ; SPE-NEXT: lwz r26, 40(r1) # 4-byte Folded Reload
1013 ; SPE-NEXT: lwz r25, 36(r1) # 4-byte Folded Reload
1014 ; SPE-NEXT: lwz r24, 32(r1) # 4-byte Folded Reload
1015 ; SPE-NEXT: lwz r23, 28(r1) # 4-byte Folded Reload
1016 ; SPE-NEXT: lwz r22, 24(r1) # 4-byte Folded Reload
1017 ; SPE-NEXT: lwz r21, 20(r1) # 4-byte Folded Reload
1018 ; SPE-NEXT: lwz r0, 68(r1)
1019 ; SPE-NEXT: addi r1, r1, 64
1022 %neg = fneg <4 x float> %vf2
1023 %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
1024 <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg,
1025 metadata !"round.dynamic",
1026 metadata !"fpexcept.strict") #0
1027 ret <4 x float> %res
1030 define <2 x double> @fmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
1031 ; CHECK-LABEL: fmsub_v2f64:
1033 ; CHECK-NEXT: xvmsubadp v4, v2, v3
1034 ; CHECK-NEXT: vmr v2, v4
1037 ; NOVSX-LABEL: fmsub_v2f64:
1039 ; NOVSX-NEXT: fmsub f2, f2, f4, f6
1040 ; NOVSX-NEXT: fmsub f1, f1, f3, f5
1043 ; SPE-LABEL: fmsub_v2f64:
1046 ; SPE-NEXT: stwu r1, -80(r1)
1047 ; SPE-NEXT: stw r0, 84(r1)
1048 ; SPE-NEXT: .cfi_def_cfa_offset 80
1049 ; SPE-NEXT: .cfi_offset lr, 4
1050 ; SPE-NEXT: .cfi_offset r26, -64
1051 ; SPE-NEXT: .cfi_offset r27, -56
1052 ; SPE-NEXT: .cfi_offset r28, -48
1053 ; SPE-NEXT: .cfi_offset r29, -40
1054 ; SPE-NEXT: .cfi_offset r30, -8
1055 ; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
1056 ; SPE-NEXT: mr r30, r3
1057 ; SPE-NEXT: evldd r3, 96(r1)
1058 ; SPE-NEXT: evldd r11, 104(r1)
1059 ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill
1060 ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill
1061 ; SPE-NEXT: efdneg r27, r11
1062 ; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill
1063 ; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill
1064 ; SPE-NEXT: evmergelo r29, r7, r8
1065 ; SPE-NEXT: evmergelo r9, r9, r10
1066 ; SPE-NEXT: evmergelo r4, r5, r6
1067 ; SPE-NEXT: efdneg r8, r3
1068 ; SPE-NEXT: evmergehi r3, r4, r4
1069 ; SPE-NEXT: evmergehi r5, r9, r9
1070 ; SPE-NEXT: evmergehi r7, r8, r8
1071 ; SPE-NEXT: mr r6, r9
1072 ; SPE-NEXT: evldd r28, 88(r1)
1074 ; SPE-NEXT: evmergelo r26, r3, r4
1075 ; SPE-NEXT: evmergehi r3, r29, r29
1076 ; SPE-NEXT: evmergehi r5, r28, r28
1077 ; SPE-NEXT: evmergehi r7, r27, r27
1078 ; SPE-NEXT: mr r4, r29
1079 ; SPE-NEXT: mr r6, r28
1080 ; SPE-NEXT: mr r8, r27
1082 ; SPE-NEXT: li r5, 8
1083 ; SPE-NEXT: evmergelo r3, r3, r4
1084 ; SPE-NEXT: evstddx r3, r30, r5
1085 ; SPE-NEXT: evstdd r26, 0(r30)
1086 ; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
1087 ; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload
1088 ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload
1089 ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload
1090 ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload
1091 ; SPE-NEXT: lwz r0, 84(r1)
1092 ; SPE-NEXT: addi r1, r1, 80
1095 %neg = fneg <2 x double> %vf2
1096 %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
1097 <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg,
1098 metadata !"round.dynamic",
1099 metadata !"fpexcept.strict") #0
1100 ret <2 x double> %res
1103 define float @fnmadd_f32(float %f0, float %f1, float %f2) #0 {
1104 ; CHECK-LABEL: fnmadd_f32:
1106 ; CHECK-NEXT: xsnmaddasp f3, f1, f2
1107 ; CHECK-NEXT: fmr f1, f3
1110 ; NOVSX-LABEL: fnmadd_f32:
1112 ; NOVSX-NEXT: fnmadds f1, f1, f2, f3
1115 ; SPE-LABEL: fnmadd_f32:
1118 ; SPE-NEXT: stwu r1, -16(r1)
1119 ; SPE-NEXT: stw r0, 20(r1)
1120 ; SPE-NEXT: .cfi_def_cfa_offset 16
1121 ; SPE-NEXT: .cfi_offset lr, 4
1123 ; SPE-NEXT: efsneg r3, r3
1124 ; SPE-NEXT: lwz r0, 20(r1)
1125 ; SPE-NEXT: addi r1, r1, 16
1128 %fma = call float @llvm.experimental.constrained.fma.f32(
1129 float %f0, float %f1, float %f2,
1130 metadata !"round.dynamic",
1131 metadata !"fpexcept.strict") #0
1132 %res = fneg float %fma
1136 define double @fnmadd_f64(double %f0, double %f1, double %f2) #0 {
1137 ; CHECK-LABEL: fnmadd_f64:
1139 ; CHECK-NEXT: xsnmaddadp f3, f1, f2
1140 ; CHECK-NEXT: fmr f1, f3
1143 ; NOVSX-LABEL: fnmadd_f64:
1145 ; NOVSX-NEXT: fnmadd f1, f1, f2, f3
1148 ; SPE-LABEL: fnmadd_f64:
1151 ; SPE-NEXT: stwu r1, -16(r1)
1152 ; SPE-NEXT: stw r0, 20(r1)
1153 ; SPE-NEXT: .cfi_def_cfa_offset 16
1154 ; SPE-NEXT: .cfi_offset lr, 4
1155 ; SPE-NEXT: evmergelo r8, r7, r8
1156 ; SPE-NEXT: evmergelo r6, r5, r6
1157 ; SPE-NEXT: evmergelo r4, r3, r4
1158 ; SPE-NEXT: evmergehi r3, r4, r4
1159 ; SPE-NEXT: evmergehi r5, r6, r6
1160 ; SPE-NEXT: evmergehi r7, r8, r8
1162 ; SPE-NEXT: evmergelo r3, r3, r4
1163 ; SPE-NEXT: efdneg r4, r3
1164 ; SPE-NEXT: evmergehi r3, r4, r4
1165 ; SPE-NEXT: lwz r0, 20(r1)
1166 ; SPE-NEXT: addi r1, r1, 16
1169 %fma = call double @llvm.experimental.constrained.fma.f64(
1170 double %f0, double %f1, double %f2,
1171 metadata !"round.dynamic",
1172 metadata !"fpexcept.strict") #0
1173 %res = fneg double %fma
1177 define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
1178 ; CHECK-LABEL: fnmadd_v4f32:
1180 ; CHECK-NEXT: xvmaddasp v4, v2, v3
1181 ; CHECK-NEXT: xvnegsp v2, v4
1184 ; NOVSX-LABEL: fnmadd_v4f32:
1186 ; NOVSX-NEXT: addi r3, r1, -32
1187 ; NOVSX-NEXT: vspltisb v5, -1
1188 ; NOVSX-NEXT: stvx v4, 0, r3
1189 ; NOVSX-NEXT: addi r3, r1, -48
1190 ; NOVSX-NEXT: stvx v3, 0, r3
1191 ; NOVSX-NEXT: addi r3, r1, -64
1192 ; NOVSX-NEXT: vslw v3, v5, v5
1193 ; NOVSX-NEXT: stvx v2, 0, r3
1194 ; NOVSX-NEXT: addi r3, r1, -16
1195 ; NOVSX-NEXT: lfs f0, -20(r1)
1196 ; NOVSX-NEXT: lfs f1, -36(r1)
1197 ; NOVSX-NEXT: lfs f2, -52(r1)
1198 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
1199 ; NOVSX-NEXT: lfs f1, -40(r1)
1200 ; NOVSX-NEXT: lfs f2, -56(r1)
1201 ; NOVSX-NEXT: stfs f0, -4(r1)
1202 ; NOVSX-NEXT: lfs f0, -24(r1)
1203 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
1204 ; NOVSX-NEXT: lfs f1, -44(r1)
1205 ; NOVSX-NEXT: lfs f2, -60(r1)
1206 ; NOVSX-NEXT: stfs f0, -8(r1)
1207 ; NOVSX-NEXT: lfs f0, -28(r1)
1208 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
1209 ; NOVSX-NEXT: lfs f1, -48(r1)
1210 ; NOVSX-NEXT: lfs f2, -64(r1)
1211 ; NOVSX-NEXT: stfs f0, -12(r1)
1212 ; NOVSX-NEXT: lfs f0, -32(r1)
1213 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
1214 ; NOVSX-NEXT: stfs f0, -16(r1)
1215 ; NOVSX-NEXT: lvx v2, 0, r3
1216 ; NOVSX-NEXT: vxor v2, v2, v3
1219 ; SPE-LABEL: fnmadd_v4f32:
1222 ; SPE-NEXT: stwu r1, -64(r1)
1223 ; SPE-NEXT: stw r0, 68(r1)
1224 ; SPE-NEXT: .cfi_def_cfa_offset 64
1225 ; SPE-NEXT: .cfi_offset lr, 4
1226 ; SPE-NEXT: .cfi_offset r21, -44
1227 ; SPE-NEXT: .cfi_offset r22, -40
1228 ; SPE-NEXT: .cfi_offset r23, -36
1229 ; SPE-NEXT: .cfi_offset r24, -32
1230 ; SPE-NEXT: .cfi_offset r25, -28
1231 ; SPE-NEXT: .cfi_offset r26, -24
1232 ; SPE-NEXT: .cfi_offset r27, -20
1233 ; SPE-NEXT: .cfi_offset r28, -16
1234 ; SPE-NEXT: .cfi_offset r29, -12
1235 ; SPE-NEXT: .cfi_offset r30, -8
1236 ; SPE-NEXT: stw r27, 44(r1) # 4-byte Folded Spill
1237 ; SPE-NEXT: mr r27, r5
1238 ; SPE-NEXT: lwz r5, 84(r1)
1239 ; SPE-NEXT: stw r25, 36(r1) # 4-byte Folded Spill
1240 ; SPE-NEXT: mr r25, r3
1241 ; SPE-NEXT: stw r26, 40(r1) # 4-byte Folded Spill
1242 ; SPE-NEXT: mr r26, r4
1243 ; SPE-NEXT: mr r3, r6
1244 ; SPE-NEXT: mr r4, r10
1245 ; SPE-NEXT: stw r21, 20(r1) # 4-byte Folded Spill
1246 ; SPE-NEXT: stw r22, 24(r1) # 4-byte Folded Spill
1247 ; SPE-NEXT: stw r23, 28(r1) # 4-byte Folded Spill
1248 ; SPE-NEXT: stw r24, 32(r1) # 4-byte Folded Spill
1249 ; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill
1250 ; SPE-NEXT: mr r28, r7
1251 ; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill
1252 ; SPE-NEXT: mr r29, r8
1253 ; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill
1254 ; SPE-NEXT: mr r30, r9
1255 ; SPE-NEXT: lwz r24, 72(r1)
1256 ; SPE-NEXT: lwz r23, 76(r1)
1257 ; SPE-NEXT: lwz r22, 80(r1)
1259 ; SPE-NEXT: mr r21, r3
1260 ; SPE-NEXT: mr r3, r27
1261 ; SPE-NEXT: mr r4, r30
1262 ; SPE-NEXT: mr r5, r22
1264 ; SPE-NEXT: mr r30, r3
1265 ; SPE-NEXT: mr r3, r26
1266 ; SPE-NEXT: mr r4, r29
1267 ; SPE-NEXT: mr r5, r23
1269 ; SPE-NEXT: mr r29, r3
1270 ; SPE-NEXT: mr r3, r25
1271 ; SPE-NEXT: mr r4, r28
1272 ; SPE-NEXT: mr r5, r24
1274 ; SPE-NEXT: efsneg r4, r29
1275 ; SPE-NEXT: efsneg r5, r30
1276 ; SPE-NEXT: efsneg r3, r3
1277 ; SPE-NEXT: efsneg r6, r21
1278 ; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload
1279 ; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload
1280 ; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload
1281 ; SPE-NEXT: lwz r27, 44(r1) # 4-byte Folded Reload
1282 ; SPE-NEXT: lwz r26, 40(r1) # 4-byte Folded Reload
1283 ; SPE-NEXT: lwz r25, 36(r1) # 4-byte Folded Reload
1284 ; SPE-NEXT: lwz r24, 32(r1) # 4-byte Folded Reload
1285 ; SPE-NEXT: lwz r23, 28(r1) # 4-byte Folded Reload
1286 ; SPE-NEXT: lwz r22, 24(r1) # 4-byte Folded Reload
1287 ; SPE-NEXT: lwz r21, 20(r1) # 4-byte Folded Reload
1288 ; SPE-NEXT: lwz r0, 68(r1)
1289 ; SPE-NEXT: addi r1, r1, 64
1292 %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
1293 <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2,
1294 metadata !"round.dynamic",
1295 metadata !"fpexcept.strict") #0
1296 %res = fneg <4 x float> %fma
1297 ret <4 x float> %res
1300 define <2 x double> @fnmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
1301 ; CHECK-LABEL: fnmadd_v2f64:
1303 ; CHECK-NEXT: xvnmaddadp v4, v2, v3
1304 ; CHECK-NEXT: vmr v2, v4
1307 ; NOVSX-LABEL: fnmadd_v2f64:
1309 ; NOVSX-NEXT: fnmadd f2, f2, f4, f6
1310 ; NOVSX-NEXT: fnmadd f1, f1, f3, f5
1313 ; SPE-LABEL: fnmadd_v2f64:
1316 ; SPE-NEXT: stwu r1, -80(r1)
1317 ; SPE-NEXT: stw r0, 84(r1)
1318 ; SPE-NEXT: .cfi_def_cfa_offset 80
1319 ; SPE-NEXT: .cfi_offset lr, 4
1320 ; SPE-NEXT: .cfi_offset r26, -64
1321 ; SPE-NEXT: .cfi_offset r27, -56
1322 ; SPE-NEXT: .cfi_offset r28, -48
1323 ; SPE-NEXT: .cfi_offset r29, -40
1324 ; SPE-NEXT: .cfi_offset r30, -8
1325 ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill
1326 ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill
1327 ; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill
1328 ; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill
1329 ; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
1330 ; SPE-NEXT: evmergelo r27, r7, r8
1331 ; SPE-NEXT: evmergelo r9, r9, r10
1332 ; SPE-NEXT: evmergelo r4, r5, r6
1333 ; SPE-NEXT: mr r30, r3
1334 ; SPE-NEXT: evldd r8, 96(r1)
1335 ; SPE-NEXT: evmergehi r3, r4, r4
1336 ; SPE-NEXT: evmergehi r5, r9, r9
1337 ; SPE-NEXT: mr r6, r9
1338 ; SPE-NEXT: evldd r29, 104(r1)
1339 ; SPE-NEXT: evmergehi r7, r8, r8
1340 ; SPE-NEXT: evldd r28, 88(r1)
1342 ; SPE-NEXT: evmergelo r26, r3, r4
1343 ; SPE-NEXT: evmergehi r3, r27, r27
1344 ; SPE-NEXT: evmergehi r5, r28, r28
1345 ; SPE-NEXT: evmergehi r7, r29, r29
1346 ; SPE-NEXT: mr r4, r27
1347 ; SPE-NEXT: mr r6, r28
1348 ; SPE-NEXT: mr r8, r29
1350 ; SPE-NEXT: evmergelo r3, r3, r4
1351 ; SPE-NEXT: li r5, 8
1352 ; SPE-NEXT: efdneg r3, r3
1353 ; SPE-NEXT: evstddx r3, r30, r5
1354 ; SPE-NEXT: efdneg r3, r26
1355 ; SPE-NEXT: evstdd r3, 0(r30)
1356 ; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
1357 ; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload
1358 ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload
1359 ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload
1360 ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload
1361 ; SPE-NEXT: lwz r0, 84(r1)
1362 ; SPE-NEXT: addi r1, r1, 80
1365 %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
1366 <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2,
1367 metadata !"round.dynamic",
1368 metadata !"fpexcept.strict") #0
1369 %res = fneg <2 x double> %fma
1370 ret <2 x double> %res
1373 define float @fnmsub_f32(float %f0, float %f1, float %f2) #0 {
1374 ; CHECK-LABEL: fnmsub_f32:
1376 ; CHECK-NEXT: xsnmsubasp f3, f1, f2
1377 ; CHECK-NEXT: fmr f1, f3
1380 ; NOVSX-LABEL: fnmsub_f32:
1382 ; NOVSX-NEXT: fnmsubs f1, f1, f2, f3
1385 ; SPE-LABEL: fnmsub_f32:
1388 ; SPE-NEXT: stwu r1, -16(r1)
1389 ; SPE-NEXT: stw r0, 20(r1)
1390 ; SPE-NEXT: .cfi_def_cfa_offset 16
1391 ; SPE-NEXT: .cfi_offset lr, 4
1392 ; SPE-NEXT: efsneg r5, r5
1394 ; SPE-NEXT: efsneg r3, r3
1395 ; SPE-NEXT: lwz r0, 20(r1)
1396 ; SPE-NEXT: addi r1, r1, 16
1399 %neg = fneg float %f2
1400 %fma = call float @llvm.experimental.constrained.fma.f32(
1401 float %f0, float %f1, float %neg,
1402 metadata !"round.dynamic",
1403 metadata !"fpexcept.strict") #0
1404 %res = fneg float %fma
1408 define double @fnmsub_f64(double %f0, double %f1, double %f2) #0 {
1409 ; CHECK-LABEL: fnmsub_f64:
1411 ; CHECK-NEXT: xsnmsubadp f3, f1, f2
1412 ; CHECK-NEXT: fmr f1, f3
1415 ; NOVSX-LABEL: fnmsub_f64:
1417 ; NOVSX-NEXT: fnmsub f1, f1, f2, f3
1420 ; SPE-LABEL: fnmsub_f64:
1423 ; SPE-NEXT: stwu r1, -16(r1)
1424 ; SPE-NEXT: stw r0, 20(r1)
1425 ; SPE-NEXT: .cfi_def_cfa_offset 16
1426 ; SPE-NEXT: .cfi_offset lr, 4
1427 ; SPE-NEXT: evmergelo r6, r5, r6
1428 ; SPE-NEXT: evmergelo r4, r3, r4
1429 ; SPE-NEXT: evmergelo r3, r7, r8
1430 ; SPE-NEXT: efdneg r8, r3
1431 ; SPE-NEXT: evmergehi r3, r4, r4
1432 ; SPE-NEXT: evmergehi r5, r6, r6
1433 ; SPE-NEXT: evmergehi r7, r8, r8
1435 ; SPE-NEXT: evmergelo r3, r3, r4
1436 ; SPE-NEXT: efdneg r4, r3
1437 ; SPE-NEXT: evmergehi r3, r4, r4
1438 ; SPE-NEXT: lwz r0, 20(r1)
1439 ; SPE-NEXT: addi r1, r1, 16
1442 %neg = fneg double %f2
1443 %fma = call double @llvm.experimental.constrained.fma.f64(
1444 double %f0, double %f1, double %neg,
1445 metadata !"round.dynamic",
1446 metadata !"fpexcept.strict") #0
1447 %res = fneg double %fma
1451 define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
1452 ; CHECK-LABEL: fnmsub_v4f32:
1454 ; CHECK-NEXT: xvnmsubasp v4, v2, v3
1455 ; CHECK-NEXT: vmr v2, v4
1458 ; NOVSX-LABEL: fnmsub_v4f32:
1460 ; NOVSX-NEXT: vspltisb v5, -1
1461 ; NOVSX-NEXT: addi r3, r1, -48
1462 ; NOVSX-NEXT: vslw v5, v5, v5
1463 ; NOVSX-NEXT: stvx v3, 0, r3
1464 ; NOVSX-NEXT: addi r3, r1, -64
1465 ; NOVSX-NEXT: vxor v4, v4, v5
1466 ; NOVSX-NEXT: stvx v2, 0, r3
1467 ; NOVSX-NEXT: addi r3, r1, -32
1468 ; NOVSX-NEXT: stvx v4, 0, r3
1469 ; NOVSX-NEXT: addi r3, r1, -16
1470 ; NOVSX-NEXT: lfs f0, -36(r1)
1471 ; NOVSX-NEXT: lfs f1, -52(r1)
1472 ; NOVSX-NEXT: lfs f2, -20(r1)
1473 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
1474 ; NOVSX-NEXT: lfs f1, -56(r1)
1475 ; NOVSX-NEXT: lfs f2, -24(r1)
1476 ; NOVSX-NEXT: stfs f0, -4(r1)
1477 ; NOVSX-NEXT: lfs f0, -40(r1)
1478 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
1479 ; NOVSX-NEXT: lfs f1, -60(r1)
1480 ; NOVSX-NEXT: lfs f2, -28(r1)
1481 ; NOVSX-NEXT: stfs f0, -8(r1)
1482 ; NOVSX-NEXT: lfs f0, -44(r1)
1483 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
1484 ; NOVSX-NEXT: lfs f1, -64(r1)
1485 ; NOVSX-NEXT: lfs f2, -32(r1)
1486 ; NOVSX-NEXT: stfs f0, -12(r1)
1487 ; NOVSX-NEXT: lfs f0, -48(r1)
1488 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
1489 ; NOVSX-NEXT: stfs f0, -16(r1)
1490 ; NOVSX-NEXT: lvx v2, 0, r3
1491 ; NOVSX-NEXT: vxor v2, v2, v5
1494 ; SPE-LABEL: fnmsub_v4f32:
1497 ; SPE-NEXT: stwu r1, -64(r1)
1498 ; SPE-NEXT: stw r0, 68(r1)
1499 ; SPE-NEXT: .cfi_def_cfa_offset 64
1500 ; SPE-NEXT: .cfi_offset lr, 4
1501 ; SPE-NEXT: .cfi_offset r21, -44
1502 ; SPE-NEXT: .cfi_offset r22, -40
1503 ; SPE-NEXT: .cfi_offset r23, -36
1504 ; SPE-NEXT: .cfi_offset r24, -32
1505 ; SPE-NEXT: .cfi_offset r25, -28
1506 ; SPE-NEXT: .cfi_offset r26, -24
1507 ; SPE-NEXT: .cfi_offset r27, -20
1508 ; SPE-NEXT: .cfi_offset r28, -16
1509 ; SPE-NEXT: .cfi_offset r29, -12
1510 ; SPE-NEXT: .cfi_offset r30, -8
1511 ; SPE-NEXT: stw r25, 36(r1) # 4-byte Folded Spill
1512 ; SPE-NEXT: mr r25, r3
1513 ; SPE-NEXT: stw r26, 40(r1) # 4-byte Folded Spill
1514 ; SPE-NEXT: mr r26, r4
1515 ; SPE-NEXT: stw r27, 44(r1) # 4-byte Folded Spill
1516 ; SPE-NEXT: mr r27, r5
1517 ; SPE-NEXT: stw r28, 48(r1) # 4-byte Folded Spill
1518 ; SPE-NEXT: mr r28, r7
1519 ; SPE-NEXT: lwz r3, 80(r1)
1520 ; SPE-NEXT: lwz r4, 72(r1)
1521 ; SPE-NEXT: lwz r5, 76(r1)
1522 ; SPE-NEXT: lwz r7, 84(r1)
1523 ; SPE-NEXT: stw r22, 24(r1) # 4-byte Folded Spill
1524 ; SPE-NEXT: efsneg r22, r3
1525 ; SPE-NEXT: stw r23, 28(r1) # 4-byte Folded Spill
1526 ; SPE-NEXT: efsneg r23, r5
1527 ; SPE-NEXT: stw r24, 32(r1) # 4-byte Folded Spill
1528 ; SPE-NEXT: efsneg r24, r4
1529 ; SPE-NEXT: efsneg r5, r7
1530 ; SPE-NEXT: mr r3, r6
1531 ; SPE-NEXT: mr r4, r10
1532 ; SPE-NEXT: stw r21, 20(r1) # 4-byte Folded Spill
1533 ; SPE-NEXT: stw r29, 52(r1) # 4-byte Folded Spill
1534 ; SPE-NEXT: mr r29, r8
1535 ; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill
1536 ; SPE-NEXT: mr r30, r9
1538 ; SPE-NEXT: mr r21, r3
1539 ; SPE-NEXT: mr r3, r27
1540 ; SPE-NEXT: mr r4, r30
1541 ; SPE-NEXT: mr r5, r22
1543 ; SPE-NEXT: mr r30, r3
1544 ; SPE-NEXT: mr r3, r26
1545 ; SPE-NEXT: mr r4, r29
1546 ; SPE-NEXT: mr r5, r23
1548 ; SPE-NEXT: mr r29, r3
1549 ; SPE-NEXT: mr r3, r25
1550 ; SPE-NEXT: mr r4, r28
1551 ; SPE-NEXT: mr r5, r24
1553 ; SPE-NEXT: efsneg r4, r29
1554 ; SPE-NEXT: efsneg r5, r30
1555 ; SPE-NEXT: efsneg r3, r3
1556 ; SPE-NEXT: efsneg r6, r21
1557 ; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload
1558 ; SPE-NEXT: lwz r29, 52(r1) # 4-byte Folded Reload
1559 ; SPE-NEXT: lwz r28, 48(r1) # 4-byte Folded Reload
1560 ; SPE-NEXT: lwz r27, 44(r1) # 4-byte Folded Reload
1561 ; SPE-NEXT: lwz r26, 40(r1) # 4-byte Folded Reload
1562 ; SPE-NEXT: lwz r25, 36(r1) # 4-byte Folded Reload
1563 ; SPE-NEXT: lwz r24, 32(r1) # 4-byte Folded Reload
1564 ; SPE-NEXT: lwz r23, 28(r1) # 4-byte Folded Reload
1565 ; SPE-NEXT: lwz r22, 24(r1) # 4-byte Folded Reload
1566 ; SPE-NEXT: lwz r21, 20(r1) # 4-byte Folded Reload
1567 ; SPE-NEXT: lwz r0, 68(r1)
1568 ; SPE-NEXT: addi r1, r1, 64
1571 %neg = fneg <4 x float> %vf2
1572 %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
1573 <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg,
1574 metadata !"round.dynamic",
1575 metadata !"fpexcept.strict") #0
1576 %res = fneg <4 x float> %fma
1577 ret <4 x float> %res
1580 define <2 x double> @fnmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
1581 ; CHECK-LABEL: fnmsub_v2f64:
1583 ; CHECK-NEXT: xvnmsubadp v4, v2, v3
1584 ; CHECK-NEXT: vmr v2, v4
1587 ; NOVSX-LABEL: fnmsub_v2f64:
1589 ; NOVSX-NEXT: fnmsub f2, f2, f4, f6
1590 ; NOVSX-NEXT: fnmsub f1, f1, f3, f5
1593 ; SPE-LABEL: fnmsub_v2f64:
1596 ; SPE-NEXT: stwu r1, -80(r1)
1597 ; SPE-NEXT: stw r0, 84(r1)
1598 ; SPE-NEXT: .cfi_def_cfa_offset 80
1599 ; SPE-NEXT: .cfi_offset lr, 4
1600 ; SPE-NEXT: .cfi_offset r26, -64
1601 ; SPE-NEXT: .cfi_offset r27, -56
1602 ; SPE-NEXT: .cfi_offset r28, -48
1603 ; SPE-NEXT: .cfi_offset r29, -40
1604 ; SPE-NEXT: .cfi_offset r30, -8
1605 ; SPE-NEXT: stw r30, 72(r1) # 4-byte Folded Spill
1606 ; SPE-NEXT: mr r30, r3
1607 ; SPE-NEXT: evldd r3, 96(r1)
1608 ; SPE-NEXT: evldd r11, 104(r1)
1609 ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill
1610 ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill
1611 ; SPE-NEXT: efdneg r27, r11
1612 ; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill
1613 ; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill
1614 ; SPE-NEXT: evmergelo r29, r7, r8
1615 ; SPE-NEXT: evmergelo r9, r9, r10
1616 ; SPE-NEXT: evmergelo r4, r5, r6
1617 ; SPE-NEXT: efdneg r8, r3
1618 ; SPE-NEXT: evmergehi r3, r4, r4
1619 ; SPE-NEXT: evmergehi r5, r9, r9
1620 ; SPE-NEXT: evmergehi r7, r8, r8
1621 ; SPE-NEXT: mr r6, r9
1622 ; SPE-NEXT: evldd r28, 88(r1)
1624 ; SPE-NEXT: evmergelo r26, r3, r4
1625 ; SPE-NEXT: evmergehi r3, r29, r29
1626 ; SPE-NEXT: evmergehi r5, r28, r28
1627 ; SPE-NEXT: evmergehi r7, r27, r27
1628 ; SPE-NEXT: mr r4, r29
1629 ; SPE-NEXT: mr r6, r28
1630 ; SPE-NEXT: mr r8, r27
1632 ; SPE-NEXT: evmergelo r3, r3, r4
1633 ; SPE-NEXT: li r5, 8
1634 ; SPE-NEXT: efdneg r3, r3
1635 ; SPE-NEXT: evstddx r3, r30, r5
1636 ; SPE-NEXT: efdneg r3, r26
1637 ; SPE-NEXT: evstdd r3, 0(r30)
1638 ; SPE-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload
1639 ; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload
1640 ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload
1641 ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload
1642 ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload
1643 ; SPE-NEXT: lwz r0, 84(r1)
1644 ; SPE-NEXT: addi r1, r1, 80
1647 %neg = fneg <2 x double> %vf2
1648 %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
1649 <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg,
1650 metadata !"round.dynamic",
1651 metadata !"fpexcept.strict") #0
1652 %res = fneg <2 x double> %fma
1653 ret <2 x double> %res
1656 define float @fsqrt_f32(float %f1) #0 {
1657 ; CHECK-LABEL: fsqrt_f32:
1659 ; CHECK-NEXT: xssqrtsp f1, f1
1662 ; NOVSX-LABEL: fsqrt_f32:
1664 ; NOVSX-NEXT: fsqrts f1, f1
1667 ; SPE-LABEL: fsqrt_f32:
1670 ; SPE-NEXT: stwu r1, -16(r1)
1671 ; SPE-NEXT: stw r0, 20(r1)
1672 ; SPE-NEXT: .cfi_def_cfa_offset 16
1673 ; SPE-NEXT: .cfi_offset lr, 4
1674 ; SPE-NEXT: bl sqrtf
1675 ; SPE-NEXT: lwz r0, 20(r1)
1676 ; SPE-NEXT: addi r1, r1, 16
1679 %res = call float @llvm.experimental.constrained.sqrt.f32(
1681 metadata !"round.dynamic",
1682 metadata !"fpexcept.strict") #0
1686 define double @fsqrt_f64(double %f1) #0 {
1687 ; CHECK-LABEL: fsqrt_f64:
1689 ; CHECK-NEXT: xssqrtdp f1, f1
1692 ; NOVSX-LABEL: fsqrt_f64:
1694 ; NOVSX-NEXT: fsqrt f1, f1
1697 ; SPE-LABEL: fsqrt_f64:
1700 ; SPE-NEXT: stwu r1, -16(r1)
1701 ; SPE-NEXT: stw r0, 20(r1)
1702 ; SPE-NEXT: .cfi_def_cfa_offset 16
1703 ; SPE-NEXT: .cfi_offset lr, 4
1704 ; SPE-NEXT: evmergelo r4, r3, r4
1705 ; SPE-NEXT: evmergehi r3, r4, r4
1707 ; SPE-NEXT: evmergelo r4, r3, r4
1708 ; SPE-NEXT: evmergehi r3, r4, r4
1709 ; SPE-NEXT: lwz r0, 20(r1)
1710 ; SPE-NEXT: addi r1, r1, 16
1713 %res = call double @llvm.experimental.constrained.sqrt.f64(
1715 metadata !"round.dynamic",
1716 metadata !"fpexcept.strict") #0
1720 define <4 x float> @fsqrt_v4f32(<4 x float> %vf1) #0 {
1721 ; CHECK-LABEL: fsqrt_v4f32:
1723 ; CHECK-NEXT: xvsqrtsp v2, v2
1726 ; NOVSX-LABEL: fsqrt_v4f32:
1728 ; NOVSX-NEXT: addi r3, r1, -32
1729 ; NOVSX-NEXT: stvx v2, 0, r3
1730 ; NOVSX-NEXT: addi r3, r1, -16
1731 ; NOVSX-NEXT: lfs f0, -20(r1)
1732 ; NOVSX-NEXT: fsqrts f0, f0
1733 ; NOVSX-NEXT: stfs f0, -4(r1)
1734 ; NOVSX-NEXT: lfs f0, -24(r1)
1735 ; NOVSX-NEXT: fsqrts f0, f0
1736 ; NOVSX-NEXT: stfs f0, -8(r1)
1737 ; NOVSX-NEXT: lfs f0, -28(r1)
1738 ; NOVSX-NEXT: fsqrts f0, f0
1739 ; NOVSX-NEXT: stfs f0, -12(r1)
1740 ; NOVSX-NEXT: lfs f0, -32(r1)
1741 ; NOVSX-NEXT: fsqrts f0, f0
1742 ; NOVSX-NEXT: stfs f0, -16(r1)
1743 ; NOVSX-NEXT: lvx v2, 0, r3
1746 ; SPE-LABEL: fsqrt_v4f32:
1749 ; SPE-NEXT: stwu r1, -32(r1)
1750 ; SPE-NEXT: stw r0, 36(r1)
1751 ; SPE-NEXT: .cfi_def_cfa_offset 32
1752 ; SPE-NEXT: .cfi_offset lr, 4
1753 ; SPE-NEXT: .cfi_offset r27, -20
1754 ; SPE-NEXT: .cfi_offset r28, -16
1755 ; SPE-NEXT: .cfi_offset r29, -12
1756 ; SPE-NEXT: .cfi_offset r30, -8
1757 ; SPE-NEXT: stw r28, 16(r1) # 4-byte Folded Spill
1758 ; SPE-NEXT: mr r28, r3
1759 ; SPE-NEXT: mr r3, r6
1760 ; SPE-NEXT: stw r27, 12(r1) # 4-byte Folded Spill
1761 ; SPE-NEXT: stw r29, 20(r1) # 4-byte Folded Spill
1762 ; SPE-NEXT: mr r29, r4
1763 ; SPE-NEXT: stw r30, 24(r1) # 4-byte Folded Spill
1764 ; SPE-NEXT: mr r30, r5
1765 ; SPE-NEXT: bl sqrtf
1766 ; SPE-NEXT: mr r27, r3
1767 ; SPE-NEXT: mr r3, r30
1768 ; SPE-NEXT: bl sqrtf
1769 ; SPE-NEXT: mr r30, r3
1770 ; SPE-NEXT: mr r3, r29
1771 ; SPE-NEXT: bl sqrtf
1772 ; SPE-NEXT: mr r29, r3
1773 ; SPE-NEXT: mr r3, r28
1774 ; SPE-NEXT: bl sqrtf
1775 ; SPE-NEXT: mr r4, r29
1776 ; SPE-NEXT: mr r5, r30
1777 ; SPE-NEXT: mr r6, r27
1778 ; SPE-NEXT: lwz r30, 24(r1) # 4-byte Folded Reload
1779 ; SPE-NEXT: lwz r29, 20(r1) # 4-byte Folded Reload
1780 ; SPE-NEXT: lwz r28, 16(r1) # 4-byte Folded Reload
1781 ; SPE-NEXT: lwz r27, 12(r1) # 4-byte Folded Reload
1782 ; SPE-NEXT: lwz r0, 36(r1)
1783 ; SPE-NEXT: addi r1, r1, 32
1786 %res = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(
1788 metadata !"round.dynamic",
1789 metadata !"fpexcept.strict") #0
1790 ret <4 x float> %res
1793 define <2 x double> @fsqrt_v2f64(<2 x double> %vf1) #0 {
1794 ; CHECK-LABEL: fsqrt_v2f64:
1796 ; CHECK-NEXT: xvsqrtdp v2, v2
1799 ; NOVSX-LABEL: fsqrt_v2f64:
1801 ; NOVSX-NEXT: fsqrt f2, f2
1802 ; NOVSX-NEXT: fsqrt f1, f1
1805 ; SPE-LABEL: fsqrt_v2f64:
1808 ; SPE-NEXT: stwu r1, -64(r1)
1809 ; SPE-NEXT: stw r0, 68(r1)
1810 ; SPE-NEXT: .cfi_def_cfa_offset 64
1811 ; SPE-NEXT: .cfi_offset lr, 4
1812 ; SPE-NEXT: .cfi_offset r28, -48
1813 ; SPE-NEXT: .cfi_offset r29, -40
1814 ; SPE-NEXT: .cfi_offset r30, -8
1815 ; SPE-NEXT: evstdd r28, 16(r1) # 8-byte Folded Spill
1816 ; SPE-NEXT: evstdd r29, 24(r1) # 8-byte Folded Spill
1817 ; SPE-NEXT: stw r30, 56(r1) # 4-byte Folded Spill
1818 ; SPE-NEXT: evmergelo r29, r7, r8
1819 ; SPE-NEXT: evmergelo r4, r5, r6
1820 ; SPE-NEXT: mr r30, r3
1821 ; SPE-NEXT: evmergehi r3, r4, r4
1823 ; SPE-NEXT: evmergelo r28, r3, r4
1824 ; SPE-NEXT: evmergehi r3, r29, r29
1825 ; SPE-NEXT: mr r4, r29
1827 ; SPE-NEXT: li r5, 8
1828 ; SPE-NEXT: evmergelo r3, r3, r4
1829 ; SPE-NEXT: evstddx r3, r30, r5
1830 ; SPE-NEXT: evstdd r28, 0(r30)
1831 ; SPE-NEXT: lwz r30, 56(r1) # 4-byte Folded Reload
1832 ; SPE-NEXT: evldd r29, 24(r1) # 8-byte Folded Reload
1833 ; SPE-NEXT: evldd r28, 16(r1) # 8-byte Folded Reload
1834 ; SPE-NEXT: lwz r0, 68(r1)
1835 ; SPE-NEXT: addi r1, r1, 64
1838 %res = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
1840 metadata !"round.dynamic",
1841 metadata !"fpexcept.strict") #0
1842 ret <2 x double> %res
1845 attributes #0 = { strictfp }