1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s
3 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s
4 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX
5 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc-unknown-linux -mattr=spe | FileCheck %s -check-prefix=SPE
7 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
8 declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
9 declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
10 declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
12 declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
13 declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
14 declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata)
15 declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
17 declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
18 declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
19 declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata)
20 declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
22 declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
23 declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
24 declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata)
25 declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
27 declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
28 declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
29 declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
30 declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
32 declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
33 declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
34 declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata)
35 declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
37 define float @fadd_f32(float %f1, float %f2) #0 {
38 ; CHECK-LABEL: fadd_f32:
40 ; CHECK-NEXT: xsaddsp f1, f1, f2
43 ; NOVSX-LABEL: fadd_f32:
45 ; NOVSX-NEXT: fadds f1, f1, f2
48 ; SPE-LABEL: fadd_f32:
50 ; SPE-NEXT: efsadd r3, r3, r4
52 %res = call float @llvm.experimental.constrained.fadd.f32(
54 metadata !"round.dynamic",
55 metadata !"fpexcept.strict") #0
59 define double @fadd_f64(double %f1, double %f2) #0 {
60 ; CHECK-LABEL: fadd_f64:
62 ; CHECK-NEXT: xsadddp f1, f1, f2
65 ; NOVSX-LABEL: fadd_f64:
67 ; NOVSX-NEXT: fadd f1, f1, f2
70 ; SPE-LABEL: fadd_f64:
72 ; SPE-NEXT: evmergelo r5, r5, r6
73 ; SPE-NEXT: evmergelo r3, r3, r4
74 ; SPE-NEXT: efdadd r4, r3, r5
75 ; SPE-NEXT: evmergehi r3, r4, r4
76 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
77 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
79 %res = call double @llvm.experimental.constrained.fadd.f64(
80 double %f1, double %f2,
81 metadata !"round.dynamic",
82 metadata !"fpexcept.strict") #0
86 define <4 x float> @fadd_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
87 ; CHECK-LABEL: fadd_v4f32:
89 ; CHECK-NEXT: xvaddsp v2, v2, v3
92 ; NOVSX-LABEL: fadd_v4f32:
94 ; NOVSX-NEXT: addi r3, r1, -32
95 ; NOVSX-NEXT: addi r4, r1, -48
96 ; NOVSX-NEXT: stvx v3, 0, r3
97 ; NOVSX-NEXT: stvx v2, 0, r4
98 ; NOVSX-NEXT: addi r3, r1, -16
99 ; NOVSX-NEXT: lfs f0, -20(r1)
100 ; NOVSX-NEXT: lfs f1, -36(r1)
101 ; NOVSX-NEXT: fadds f0, f1, f0
102 ; NOVSX-NEXT: lfs f1, -40(r1)
103 ; NOVSX-NEXT: stfs f0, -4(r1)
104 ; NOVSX-NEXT: lfs f0, -24(r1)
105 ; NOVSX-NEXT: fadds f0, f1, f0
106 ; NOVSX-NEXT: lfs f1, -44(r1)
107 ; NOVSX-NEXT: stfs f0, -8(r1)
108 ; NOVSX-NEXT: lfs f0, -28(r1)
109 ; NOVSX-NEXT: fadds f0, f1, f0
110 ; NOVSX-NEXT: lfs f1, -48(r1)
111 ; NOVSX-NEXT: stfs f0, -12(r1)
112 ; NOVSX-NEXT: lfs f0, -32(r1)
113 ; NOVSX-NEXT: fadds f0, f1, f0
114 ; NOVSX-NEXT: stfs f0, -16(r1)
115 ; NOVSX-NEXT: lvx v2, 0, r3
118 ; SPE-LABEL: fadd_v4f32:
120 ; SPE-NEXT: efsadd r6, r6, r10
121 ; SPE-NEXT: efsadd r5, r5, r9
122 ; SPE-NEXT: efsadd r4, r4, r8
123 ; SPE-NEXT: efsadd r3, r3, r7
125 %res = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(
126 <4 x float> %vf1, <4 x float> %vf2,
127 metadata !"round.dynamic",
128 metadata !"fpexcept.strict") #0
132 define <2 x double> @fadd_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
133 ; CHECK-LABEL: fadd_v2f64:
135 ; CHECK-NEXT: xvadddp v2, v2, v3
138 ; NOVSX-LABEL: fadd_v2f64:
140 ; NOVSX-NEXT: fadd f2, f2, f4
141 ; NOVSX-NEXT: fadd f1, f1, f3
144 ; SPE-LABEL: fadd_v2f64:
146 ; SPE-NEXT: evldd r4, 8(r1)
147 ; SPE-NEXT: evmergelo r7, r7, r8
148 ; SPE-NEXT: evmergelo r8, r9, r10
150 ; SPE-NEXT: evmergelo r5, r5, r6
151 ; SPE-NEXT: efdadd r4, r7, r4
152 ; SPE-NEXT: evstddx r4, r3, r9
153 ; SPE-NEXT: efdadd r4, r5, r8
154 ; SPE-NEXT: evstdd r4, 0(r3)
156 %res = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
157 <2 x double> %vf1, <2 x double> %vf2,
158 metadata !"round.dynamic",
159 metadata !"fpexcept.strict") #0
160 ret <2 x double> %res
163 define float @fsub_f32(float %f1, float %f2) #0 {
164 ; CHECK-LABEL: fsub_f32:
166 ; CHECK-NEXT: xssubsp f1, f1, f2
169 ; NOVSX-LABEL: fsub_f32:
171 ; NOVSX-NEXT: fsubs f1, f1, f2
174 ; SPE-LABEL: fsub_f32:
176 ; SPE-NEXT: efssub r3, r3, r4
179 %res = call float @llvm.experimental.constrained.fsub.f32(
180 float %f1, float %f2,
181 metadata !"round.dynamic",
182 metadata !"fpexcept.strict") #0
186 define double @fsub_f64(double %f1, double %f2) #0 {
187 ; CHECK-LABEL: fsub_f64:
189 ; CHECK-NEXT: xssubdp f1, f1, f2
192 ; NOVSX-LABEL: fsub_f64:
194 ; NOVSX-NEXT: fsub f1, f1, f2
197 ; SPE-LABEL: fsub_f64:
199 ; SPE-NEXT: evmergelo r5, r5, r6
200 ; SPE-NEXT: evmergelo r3, r3, r4
201 ; SPE-NEXT: efdsub r4, r3, r5
202 ; SPE-NEXT: evmergehi r3, r4, r4
203 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
204 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
207 %res = call double @llvm.experimental.constrained.fsub.f64(
208 double %f1, double %f2,
209 metadata !"round.dynamic",
210 metadata !"fpexcept.strict") #0
214 define <4 x float> @fsub_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
215 ; CHECK-LABEL: fsub_v4f32:
217 ; CHECK-NEXT: xvsubsp v2, v2, v3
220 ; NOVSX-LABEL: fsub_v4f32:
222 ; NOVSX-NEXT: addi r3, r1, -32
223 ; NOVSX-NEXT: addi r4, r1, -48
224 ; NOVSX-NEXT: stvx v3, 0, r3
225 ; NOVSX-NEXT: stvx v2, 0, r4
226 ; NOVSX-NEXT: addi r3, r1, -16
227 ; NOVSX-NEXT: lfs f0, -20(r1)
228 ; NOVSX-NEXT: lfs f1, -36(r1)
229 ; NOVSX-NEXT: fsubs f0, f1, f0
230 ; NOVSX-NEXT: lfs f1, -40(r1)
231 ; NOVSX-NEXT: stfs f0, -4(r1)
232 ; NOVSX-NEXT: lfs f0, -24(r1)
233 ; NOVSX-NEXT: fsubs f0, f1, f0
234 ; NOVSX-NEXT: lfs f1, -44(r1)
235 ; NOVSX-NEXT: stfs f0, -8(r1)
236 ; NOVSX-NEXT: lfs f0, -28(r1)
237 ; NOVSX-NEXT: fsubs f0, f1, f0
238 ; NOVSX-NEXT: lfs f1, -48(r1)
239 ; NOVSX-NEXT: stfs f0, -12(r1)
240 ; NOVSX-NEXT: lfs f0, -32(r1)
241 ; NOVSX-NEXT: fsubs f0, f1, f0
242 ; NOVSX-NEXT: stfs f0, -16(r1)
243 ; NOVSX-NEXT: lvx v2, 0, r3
246 ; SPE-LABEL: fsub_v4f32:
248 ; SPE-NEXT: efssub r6, r6, r10
249 ; SPE-NEXT: efssub r5, r5, r9
250 ; SPE-NEXT: efssub r4, r4, r8
251 ; SPE-NEXT: efssub r3, r3, r7
253 %res = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(
254 <4 x float> %vf1, <4 x float> %vf2,
255 metadata !"round.dynamic",
256 metadata !"fpexcept.strict") #0
257 ret <4 x float> %res;
260 define <2 x double> @fsub_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
261 ; CHECK-LABEL: fsub_v2f64:
263 ; CHECK-NEXT: xvsubdp v2, v2, v3
266 ; NOVSX-LABEL: fsub_v2f64:
268 ; NOVSX-NEXT: fsub f2, f2, f4
269 ; NOVSX-NEXT: fsub f1, f1, f3
272 ; SPE-LABEL: fsub_v2f64:
274 ; SPE-NEXT: evldd r4, 8(r1)
275 ; SPE-NEXT: evmergelo r7, r7, r8
276 ; SPE-NEXT: evmergelo r8, r9, r10
278 ; SPE-NEXT: evmergelo r5, r5, r6
279 ; SPE-NEXT: efdsub r4, r7, r4
280 ; SPE-NEXT: evstddx r4, r3, r9
281 ; SPE-NEXT: efdsub r4, r5, r8
282 ; SPE-NEXT: evstdd r4, 0(r3)
284 %res = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
285 <2 x double> %vf1, <2 x double> %vf2,
286 metadata !"round.dynamic",
287 metadata !"fpexcept.strict") #0
288 ret <2 x double> %res;
291 define float @fmul_f32(float %f1, float %f2) #0 {
292 ; CHECK-LABEL: fmul_f32:
294 ; CHECK-NEXT: xsmulsp f1, f1, f2
297 ; NOVSX-LABEL: fmul_f32:
299 ; NOVSX-NEXT: fmuls f1, f1, f2
302 ; SPE-LABEL: fmul_f32:
304 ; SPE-NEXT: efsmul r3, r3, r4
307 %res = call float @llvm.experimental.constrained.fmul.f32(
308 float %f1, float %f2,
309 metadata !"round.dynamic",
310 metadata !"fpexcept.strict") #0
314 define double @fmul_f64(double %f1, double %f2) #0 {
315 ; CHECK-LABEL: fmul_f64:
317 ; CHECK-NEXT: xsmuldp f1, f1, f2
320 ; NOVSX-LABEL: fmul_f64:
322 ; NOVSX-NEXT: fmul f1, f1, f2
325 ; SPE-LABEL: fmul_f64:
327 ; SPE-NEXT: evmergelo r5, r5, r6
328 ; SPE-NEXT: evmergelo r3, r3, r4
329 ; SPE-NEXT: efdmul r4, r3, r5
330 ; SPE-NEXT: evmergehi r3, r4, r4
331 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
332 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
335 %res = call double @llvm.experimental.constrained.fmul.f64(
336 double %f1, double %f2,
337 metadata !"round.dynamic",
338 metadata !"fpexcept.strict") #0
342 define <4 x float> @fmul_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
343 ; CHECK-LABEL: fmul_v4f32:
345 ; CHECK-NEXT: xvmulsp v2, v2, v3
348 ; NOVSX-LABEL: fmul_v4f32:
350 ; NOVSX-NEXT: addi r3, r1, -32
351 ; NOVSX-NEXT: addi r4, r1, -48
352 ; NOVSX-NEXT: stvx v3, 0, r3
353 ; NOVSX-NEXT: stvx v2, 0, r4
354 ; NOVSX-NEXT: addi r3, r1, -16
355 ; NOVSX-NEXT: lfs f0, -20(r1)
356 ; NOVSX-NEXT: lfs f1, -36(r1)
357 ; NOVSX-NEXT: fmuls f0, f1, f0
358 ; NOVSX-NEXT: lfs f1, -40(r1)
359 ; NOVSX-NEXT: stfs f0, -4(r1)
360 ; NOVSX-NEXT: lfs f0, -24(r1)
361 ; NOVSX-NEXT: fmuls f0, f1, f0
362 ; NOVSX-NEXT: lfs f1, -44(r1)
363 ; NOVSX-NEXT: stfs f0, -8(r1)
364 ; NOVSX-NEXT: lfs f0, -28(r1)
365 ; NOVSX-NEXT: fmuls f0, f1, f0
366 ; NOVSX-NEXT: lfs f1, -48(r1)
367 ; NOVSX-NEXT: stfs f0, -12(r1)
368 ; NOVSX-NEXT: lfs f0, -32(r1)
369 ; NOVSX-NEXT: fmuls f0, f1, f0
370 ; NOVSX-NEXT: stfs f0, -16(r1)
371 ; NOVSX-NEXT: lvx v2, 0, r3
374 ; SPE-LABEL: fmul_v4f32:
376 ; SPE-NEXT: efsmul r6, r6, r10
377 ; SPE-NEXT: efsmul r5, r5, r9
378 ; SPE-NEXT: efsmul r4, r4, r8
379 ; SPE-NEXT: efsmul r3, r3, r7
381 %res = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(
382 <4 x float> %vf1, <4 x float> %vf2,
383 metadata !"round.dynamic",
384 metadata !"fpexcept.strict") #0
385 ret <4 x float> %res;
388 define <2 x double> @fmul_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
389 ; CHECK-LABEL: fmul_v2f64:
391 ; CHECK-NEXT: xvmuldp v2, v2, v3
394 ; NOVSX-LABEL: fmul_v2f64:
396 ; NOVSX-NEXT: fmul f2, f2, f4
397 ; NOVSX-NEXT: fmul f1, f1, f3
400 ; SPE-LABEL: fmul_v2f64:
402 ; SPE-NEXT: evldd r4, 8(r1)
403 ; SPE-NEXT: evmergelo r7, r7, r8
404 ; SPE-NEXT: evmergelo r8, r9, r10
406 ; SPE-NEXT: evmergelo r5, r5, r6
407 ; SPE-NEXT: efdmul r4, r7, r4
408 ; SPE-NEXT: evstddx r4, r3, r9
409 ; SPE-NEXT: efdmul r4, r5, r8
410 ; SPE-NEXT: evstdd r4, 0(r3)
412 %res = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
413 <2 x double> %vf1, <2 x double> %vf2,
414 metadata !"round.dynamic",
415 metadata !"fpexcept.strict") #0
416 ret <2 x double> %res;
419 define float @fdiv_f32(float %f1, float %f2) #0 {
420 ; CHECK-LABEL: fdiv_f32:
422 ; CHECK-NEXT: xsdivsp f1, f1, f2
425 ; NOVSX-LABEL: fdiv_f32:
427 ; NOVSX-NEXT: fdivs f1, f1, f2
430 ; SPE-LABEL: fdiv_f32:
432 ; SPE-NEXT: efsdiv r3, r3, r4
435 %res = call float @llvm.experimental.constrained.fdiv.f32(
436 float %f1, float %f2,
437 metadata !"round.dynamic",
438 metadata !"fpexcept.strict") #0
442 define double @fdiv_f64(double %f1, double %f2) #0 {
443 ; CHECK-LABEL: fdiv_f64:
445 ; CHECK-NEXT: xsdivdp f1, f1, f2
448 ; NOVSX-LABEL: fdiv_f64:
450 ; NOVSX-NEXT: fdiv f1, f1, f2
453 ; SPE-LABEL: fdiv_f64:
455 ; SPE-NEXT: evmergelo r5, r5, r6
456 ; SPE-NEXT: evmergelo r3, r3, r4
457 ; SPE-NEXT: efddiv r4, r3, r5
458 ; SPE-NEXT: evmergehi r3, r4, r4
459 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
460 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
463 %res = call double @llvm.experimental.constrained.fdiv.f64(
464 double %f1, double %f2,
465 metadata !"round.dynamic",
466 metadata !"fpexcept.strict") #0
470 define <4 x float> @fdiv_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
471 ; CHECK-LABEL: fdiv_v4f32:
473 ; CHECK-NEXT: xvdivsp v2, v2, v3
476 ; NOVSX-LABEL: fdiv_v4f32:
478 ; NOVSX-NEXT: addi r3, r1, -32
479 ; NOVSX-NEXT: addi r4, r1, -48
480 ; NOVSX-NEXT: stvx v3, 0, r3
481 ; NOVSX-NEXT: stvx v2, 0, r4
482 ; NOVSX-NEXT: addi r3, r1, -16
483 ; NOVSX-NEXT: lfs f0, -20(r1)
484 ; NOVSX-NEXT: lfs f1, -36(r1)
485 ; NOVSX-NEXT: fdivs f0, f1, f0
486 ; NOVSX-NEXT: lfs f1, -40(r1)
487 ; NOVSX-NEXT: stfs f0, -4(r1)
488 ; NOVSX-NEXT: lfs f0, -24(r1)
489 ; NOVSX-NEXT: fdivs f0, f1, f0
490 ; NOVSX-NEXT: lfs f1, -44(r1)
491 ; NOVSX-NEXT: stfs f0, -8(r1)
492 ; NOVSX-NEXT: lfs f0, -28(r1)
493 ; NOVSX-NEXT: fdivs f0, f1, f0
494 ; NOVSX-NEXT: lfs f1, -48(r1)
495 ; NOVSX-NEXT: stfs f0, -12(r1)
496 ; NOVSX-NEXT: lfs f0, -32(r1)
497 ; NOVSX-NEXT: fdivs f0, f1, f0
498 ; NOVSX-NEXT: stfs f0, -16(r1)
499 ; NOVSX-NEXT: lvx v2, 0, r3
502 ; SPE-LABEL: fdiv_v4f32:
504 ; SPE-NEXT: efsdiv r6, r6, r10
505 ; SPE-NEXT: efsdiv r5, r5, r9
506 ; SPE-NEXT: efsdiv r4, r4, r8
507 ; SPE-NEXT: efsdiv r3, r3, r7
509 %res = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(
510 <4 x float> %vf1, <4 x float> %vf2,
511 metadata !"round.dynamic",
512 metadata !"fpexcept.strict") #0
516 define <2 x double> @fdiv_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
517 ; CHECK-LABEL: fdiv_v2f64:
519 ; CHECK-NEXT: xvdivdp v2, v2, v3
522 ; NOVSX-LABEL: fdiv_v2f64:
524 ; NOVSX-NEXT: fdiv f2, f2, f4
525 ; NOVSX-NEXT: fdiv f1, f1, f3
528 ; SPE-LABEL: fdiv_v2f64:
530 ; SPE-NEXT: evldd r4, 8(r1)
531 ; SPE-NEXT: evmergelo r7, r7, r8
532 ; SPE-NEXT: evmergelo r8, r9, r10
533 ; SPE-NEXT: evmergelo r5, r5, r6
534 ; SPE-NEXT: efddiv r4, r7, r4
536 ; SPE-NEXT: evstddx r4, r3, r7
537 ; SPE-NEXT: efddiv r4, r5, r8
538 ; SPE-NEXT: evstdd r4, 0(r3)
540 %res = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
541 <2 x double> %vf1, <2 x double> %vf2,
542 metadata !"round.dynamic",
543 metadata !"fpexcept.strict") #0
544 ret <2 x double> %res
547 define double @no_fma_fold(double %f1, double %f2, double %f3) #0 {
548 ; CHECK-LABEL: no_fma_fold:
550 ; CHECK-NEXT: xsmuldp f0, f1, f2
551 ; CHECK-NEXT: xsadddp f1, f0, f3
554 ; NOVSX-LABEL: no_fma_fold:
556 ; NOVSX-NEXT: fmul f0, f1, f2
557 ; NOVSX-NEXT: fadd f1, f0, f3
560 ; SPE-LABEL: no_fma_fold:
562 ; SPE-NEXT: evmergelo r7, r7, r8
563 ; SPE-NEXT: evmergelo r5, r5, r6
564 ; SPE-NEXT: evmergelo r3, r3, r4
565 ; SPE-NEXT: efdmul r3, r3, r5
566 ; SPE-NEXT: efdadd r4, r3, r7
567 ; SPE-NEXT: evmergehi r3, r4, r4
568 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
569 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
571 %mul = call double @llvm.experimental.constrained.fmul.f64(
572 double %f1, double %f2,
573 metadata !"round.dynamic",
574 metadata !"fpexcept.strict") #0
575 %add = call double @llvm.experimental.constrained.fadd.f64(
576 double %mul, double %f3,
577 metadata !"round.dynamic",
578 metadata !"fpexcept.strict") #0
582 define float @fmadd_f32(float %f0, float %f1, float %f2) #0 {
583 ; CHECK-LABEL: fmadd_f32:
585 ; CHECK-NEXT: xsmaddasp f3, f1, f2
586 ; CHECK-NEXT: fmr f1, f3
589 ; NOVSX-LABEL: fmadd_f32:
591 ; NOVSX-NEXT: fmadds f1, f1, f2, f3
594 ; SPE-LABEL: fmadd_f32:
597 ; SPE-NEXT: stw r0, 4(r1)
598 ; SPE-NEXT: stwu r1, -16(r1)
599 ; SPE-NEXT: .cfi_def_cfa_offset 16
600 ; SPE-NEXT: .cfi_offset lr, 4
602 ; SPE-NEXT: lwz r0, 20(r1)
603 ; SPE-NEXT: addi r1, r1, 16
606 %res = call float @llvm.experimental.constrained.fma.f32(
607 float %f0, float %f1, float %f2,
608 metadata !"round.dynamic",
609 metadata !"fpexcept.strict") #0
613 define double @fmadd_f64(double %f0, double %f1, double %f2) #0 {
614 ; CHECK-LABEL: fmadd_f64:
616 ; CHECK-NEXT: xsmaddadp f3, f1, f2
617 ; CHECK-NEXT: fmr f1, f3
620 ; NOVSX-LABEL: fmadd_f64:
622 ; NOVSX-NEXT: fmadd f1, f1, f2, f3
625 ; SPE-LABEL: fmadd_f64:
628 ; SPE-NEXT: stw r0, 4(r1)
629 ; SPE-NEXT: stwu r1, -16(r1)
630 ; SPE-NEXT: .cfi_def_cfa_offset 16
631 ; SPE-NEXT: .cfi_offset lr, 4
632 ; SPE-NEXT: evmergelo r8, r7, r8
633 ; SPE-NEXT: evmergelo r6, r5, r6
634 ; SPE-NEXT: evmergelo r4, r3, r4
635 ; SPE-NEXT: evmergehi r3, r4, r4
636 ; SPE-NEXT: evmergehi r5, r6, r6
637 ; SPE-NEXT: evmergehi r7, r8, r8
638 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
639 ; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6
640 ; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
641 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
642 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
643 ; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
645 ; SPE-NEXT: evmergelo r4, r3, r4
646 ; SPE-NEXT: evmergehi r3, r4, r4
647 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
648 ; SPE-NEXT: lwz r0, 20(r1)
649 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
650 ; SPE-NEXT: addi r1, r1, 16
653 %res = call double @llvm.experimental.constrained.fma.f64(
654 double %f0, double %f1, double %f2,
655 metadata !"round.dynamic",
656 metadata !"fpexcept.strict") #0
660 define <4 x float> @fmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
661 ; CHECK-LABEL: fmadd_v4f32:
663 ; CHECK-NEXT: xvmaddasp v4, v2, v3
664 ; CHECK-NEXT: vmr v2, v4
667 ; NOVSX-LABEL: fmadd_v4f32:
669 ; NOVSX-NEXT: addi r3, r1, -32
670 ; NOVSX-NEXT: addi r4, r1, -48
671 ; NOVSX-NEXT: stvx v4, 0, r3
672 ; NOVSX-NEXT: addi r3, r1, -64
673 ; NOVSX-NEXT: stvx v3, 0, r4
674 ; NOVSX-NEXT: stvx v2, 0, r3
675 ; NOVSX-NEXT: addi r3, r1, -16
676 ; NOVSX-NEXT: lfs f0, -20(r1)
677 ; NOVSX-NEXT: lfs f1, -36(r1)
678 ; NOVSX-NEXT: lfs f2, -52(r1)
679 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
680 ; NOVSX-NEXT: lfs f1, -40(r1)
681 ; NOVSX-NEXT: lfs f2, -56(r1)
682 ; NOVSX-NEXT: stfs f0, -4(r1)
683 ; NOVSX-NEXT: lfs f0, -24(r1)
684 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
685 ; NOVSX-NEXT: lfs f1, -44(r1)
686 ; NOVSX-NEXT: lfs f2, -60(r1)
687 ; NOVSX-NEXT: stfs f0, -8(r1)
688 ; NOVSX-NEXT: lfs f0, -28(r1)
689 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
690 ; NOVSX-NEXT: lfs f1, -48(r1)
691 ; NOVSX-NEXT: lfs f2, -64(r1)
692 ; NOVSX-NEXT: stfs f0, -12(r1)
693 ; NOVSX-NEXT: lfs f0, -32(r1)
694 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
695 ; NOVSX-NEXT: stfs f0, -16(r1)
696 ; NOVSX-NEXT: lvx v2, 0, r3
699 ; SPE-LABEL: fmadd_v4f32:
702 ; SPE-NEXT: stw r0, 4(r1)
703 ; SPE-NEXT: stwu r1, -96(r1)
704 ; SPE-NEXT: .cfi_def_cfa_offset 96
705 ; SPE-NEXT: .cfi_offset lr, 4
706 ; SPE-NEXT: .cfi_offset r21, -88
707 ; SPE-NEXT: .cfi_offset r22, -80
708 ; SPE-NEXT: .cfi_offset r23, -72
709 ; SPE-NEXT: .cfi_offset r24, -64
710 ; SPE-NEXT: .cfi_offset r25, -56
711 ; SPE-NEXT: .cfi_offset r26, -48
712 ; SPE-NEXT: .cfi_offset r27, -40
713 ; SPE-NEXT: .cfi_offset r28, -32
714 ; SPE-NEXT: .cfi_offset r29, -24
715 ; SPE-NEXT: .cfi_offset r30, -16
716 ; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill
717 ; SPE-NEXT: mr r27, r5
718 ; SPE-NEXT: lwz r5, 116(r1)
719 ; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill
720 ; SPE-NEXT: mr r25, r3
721 ; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill
722 ; SPE-NEXT: mr r26, r4
723 ; SPE-NEXT: mr r3, r6
724 ; SPE-NEXT: mr r4, r10
725 ; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill
726 ; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill
727 ; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill
728 ; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill
729 ; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill
730 ; SPE-NEXT: mr r28, r7
731 ; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill
732 ; SPE-NEXT: mr r29, r8
733 ; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill
734 ; SPE-NEXT: mr r30, r9
735 ; SPE-NEXT: lwz r24, 104(r1)
736 ; SPE-NEXT: lwz r23, 108(r1)
737 ; SPE-NEXT: lwz r22, 112(r1)
739 ; SPE-NEXT: mr r21, r3
740 ; SPE-NEXT: mr r3, r27
741 ; SPE-NEXT: mr r4, r30
742 ; SPE-NEXT: mr r5, r22
744 ; SPE-NEXT: mr r30, r3
745 ; SPE-NEXT: mr r3, r26
746 ; SPE-NEXT: mr r4, r29
747 ; SPE-NEXT: mr r5, r23
749 ; SPE-NEXT: mr r29, r3
750 ; SPE-NEXT: mr r3, r25
751 ; SPE-NEXT: mr r4, r28
752 ; SPE-NEXT: mr r5, r24
754 ; SPE-NEXT: mr r4, r29
755 ; SPE-NEXT: mr r5, r30
756 ; SPE-NEXT: mr r6, r21
757 ; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload
758 ; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload
759 ; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload
760 ; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload
761 ; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload
762 ; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload
763 ; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload
764 ; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload
765 ; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload
766 ; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload
767 ; SPE-NEXT: lwz r0, 100(r1)
768 ; SPE-NEXT: addi r1, r1, 96
771 %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
772 <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2,
773 metadata !"round.dynamic",
774 metadata !"fpexcept.strict") #0
778 define <2 x double> @fmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
779 ; CHECK-LABEL: fmadd_v2f64:
781 ; CHECK-NEXT: xvmaddadp v4, v2, v3
782 ; CHECK-NEXT: vmr v2, v4
785 ; NOVSX-LABEL: fmadd_v2f64:
787 ; NOVSX-NEXT: fmadd f2, f2, f4, f6
788 ; NOVSX-NEXT: fmadd f1, f1, f3, f5
791 ; SPE-LABEL: fmadd_v2f64:
794 ; SPE-NEXT: stw r0, 4(r1)
795 ; SPE-NEXT: stwu r1, -64(r1)
796 ; SPE-NEXT: .cfi_def_cfa_offset 64
797 ; SPE-NEXT: .cfi_offset lr, 4
798 ; SPE-NEXT: .cfi_offset r26, -48
799 ; SPE-NEXT: .cfi_offset r27, -40
800 ; SPE-NEXT: .cfi_offset r28, -32
801 ; SPE-NEXT: .cfi_offset r29, -24
802 ; SPE-NEXT: .cfi_offset r30, -16
803 ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill
804 ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill
805 ; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill
806 ; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill
807 ; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill
808 ; SPE-NEXT: evmergelo r27, r7, r8
809 ; SPE-NEXT: evmergelo r9, r9, r10
810 ; SPE-NEXT: evmergelo r4, r5, r6
811 ; SPE-NEXT: mr r30, r3
812 ; SPE-NEXT: evldd r8, 80(r1)
813 ; SPE-NEXT: evmergehi r3, r4, r4
814 ; SPE-NEXT: evmergehi r5, r9, r9
815 ; SPE-NEXT: mr r6, r9
816 ; SPE-NEXT: evldd r29, 88(r1)
817 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
818 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
819 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
820 ; SPE-NEXT: evmergehi r7, r8, r8
821 ; SPE-NEXT: evldd r28, 72(r1)
822 ; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
823 ; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
825 ; SPE-NEXT: evmergelo r26, r3, r4
826 ; SPE-NEXT: evmergehi r3, r27, r27
827 ; SPE-NEXT: evmergehi r5, r28, r28
828 ; SPE-NEXT: evmergehi r7, r29, r29
829 ; SPE-NEXT: mr r4, r27
830 ; SPE-NEXT: mr r6, r28
831 ; SPE-NEXT: mr r8, r29
832 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
833 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
834 ; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
837 ; SPE-NEXT: evmergelo r3, r3, r4
838 ; SPE-NEXT: evstddx r3, r30, r5
839 ; SPE-NEXT: evstdd r26, 0(r30)
840 ; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload
841 ; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload
842 ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload
843 ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload
844 ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload
845 ; SPE-NEXT: lwz r0, 68(r1)
846 ; SPE-NEXT: addi r1, r1, 64
849 %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
850 <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2,
851 metadata !"round.dynamic",
852 metadata !"fpexcept.strict") #0
853 ret <2 x double> %res
856 define float @fmsub_f32(float %f0, float %f1, float %f2) #0 {
857 ; CHECK-LABEL: fmsub_f32:
859 ; CHECK-NEXT: xsmsubasp f3, f1, f2
860 ; CHECK-NEXT: fmr f1, f3
863 ; NOVSX-LABEL: fmsub_f32:
865 ; NOVSX-NEXT: fmsubs f1, f1, f2, f3
868 ; SPE-LABEL: fmsub_f32:
871 ; SPE-NEXT: stw r0, 4(r1)
872 ; SPE-NEXT: stwu r1, -16(r1)
873 ; SPE-NEXT: .cfi_def_cfa_offset 16
874 ; SPE-NEXT: .cfi_offset lr, 4
875 ; SPE-NEXT: efsneg r5, r5
877 ; SPE-NEXT: lwz r0, 20(r1)
878 ; SPE-NEXT: addi r1, r1, 16
881 %neg = fneg float %f2
882 %res = call float @llvm.experimental.constrained.fma.f32(
883 float %f0, float %f1, float %neg,
884 metadata !"round.dynamic",
885 metadata !"fpexcept.strict") #0
889 define double @fmsub_f64(double %f0, double %f1, double %f2) #0 {
890 ; CHECK-LABEL: fmsub_f64:
892 ; CHECK-NEXT: xsmsubadp f3, f1, f2
893 ; CHECK-NEXT: fmr f1, f3
896 ; NOVSX-LABEL: fmsub_f64:
898 ; NOVSX-NEXT: fmsub f1, f1, f2, f3
901 ; SPE-LABEL: fmsub_f64:
904 ; SPE-NEXT: stw r0, 4(r1)
905 ; SPE-NEXT: stwu r1, -16(r1)
906 ; SPE-NEXT: .cfi_def_cfa_offset 16
907 ; SPE-NEXT: .cfi_offset lr, 4
908 ; SPE-NEXT: evmergelo r6, r5, r6
909 ; SPE-NEXT: evmergelo r4, r3, r4
910 ; SPE-NEXT: evmergelo r3, r7, r8
911 ; SPE-NEXT: efdneg r8, r3
912 ; SPE-NEXT: evmergehi r3, r4, r4
913 ; SPE-NEXT: evmergehi r5, r6, r6
914 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
915 ; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6
916 ; SPE-NEXT: evmergehi r7, r8, r8
917 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
918 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
919 ; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
920 ; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
922 ; SPE-NEXT: evmergelo r4, r3, r4
923 ; SPE-NEXT: evmergehi r3, r4, r4
924 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
925 ; SPE-NEXT: lwz r0, 20(r1)
926 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
927 ; SPE-NEXT: addi r1, r1, 16
930 %neg = fneg double %f2
931 %res = call double @llvm.experimental.constrained.fma.f64(
932 double %f0, double %f1, double %neg,
933 metadata !"round.dynamic",
934 metadata !"fpexcept.strict") #0
938 define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
939 ; CHECK-LABEL: fmsub_v4f32:
941 ; CHECK-NEXT: xvmsubasp v4, v2, v3
942 ; CHECK-NEXT: vmr v2, v4
945 ; NOVSX-LABEL: fmsub_v4f32:
947 ; NOVSX-NEXT: vspltisb v5, -1
948 ; NOVSX-NEXT: addi r3, r1, -48
949 ; NOVSX-NEXT: addi r4, r1, -64
950 ; NOVSX-NEXT: stvx v3, 0, r3
951 ; NOVSX-NEXT: addi r3, r1, -32
952 ; NOVSX-NEXT: stvx v2, 0, r4
953 ; NOVSX-NEXT: vslw v5, v5, v5
954 ; NOVSX-NEXT: vsubfp v4, v5, v4
955 ; NOVSX-NEXT: stvx v4, 0, r3
956 ; NOVSX-NEXT: addi r3, r1, -16
957 ; NOVSX-NEXT: lfs f0, -36(r1)
958 ; NOVSX-NEXT: lfs f1, -52(r1)
959 ; NOVSX-NEXT: lfs f2, -20(r1)
960 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
961 ; NOVSX-NEXT: lfs f1, -56(r1)
962 ; NOVSX-NEXT: lfs f2, -24(r1)
963 ; NOVSX-NEXT: stfs f0, -4(r1)
964 ; NOVSX-NEXT: lfs f0, -40(r1)
965 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
966 ; NOVSX-NEXT: lfs f1, -60(r1)
967 ; NOVSX-NEXT: lfs f2, -28(r1)
968 ; NOVSX-NEXT: stfs f0, -8(r1)
969 ; NOVSX-NEXT: lfs f0, -44(r1)
970 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
971 ; NOVSX-NEXT: lfs f1, -64(r1)
972 ; NOVSX-NEXT: lfs f2, -32(r1)
973 ; NOVSX-NEXT: stfs f0, -12(r1)
974 ; NOVSX-NEXT: lfs f0, -48(r1)
975 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
976 ; NOVSX-NEXT: stfs f0, -16(r1)
977 ; NOVSX-NEXT: lvx v2, 0, r3
980 ; SPE-LABEL: fmsub_v4f32:
983 ; SPE-NEXT: stw r0, 4(r1)
984 ; SPE-NEXT: stwu r1, -96(r1)
985 ; SPE-NEXT: .cfi_def_cfa_offset 96
986 ; SPE-NEXT: .cfi_offset lr, 4
987 ; SPE-NEXT: .cfi_offset r21, -88
988 ; SPE-NEXT: .cfi_offset r22, -80
989 ; SPE-NEXT: .cfi_offset r23, -72
990 ; SPE-NEXT: .cfi_offset r24, -64
991 ; SPE-NEXT: .cfi_offset r25, -56
992 ; SPE-NEXT: .cfi_offset r26, -48
993 ; SPE-NEXT: .cfi_offset r27, -40
994 ; SPE-NEXT: .cfi_offset r28, -32
995 ; SPE-NEXT: .cfi_offset r29, -24
996 ; SPE-NEXT: .cfi_offset r30, -16
997 ; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill
998 ; SPE-NEXT: mr r25, r3
999 ; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill
1000 ; SPE-NEXT: mr r26, r4
1001 ; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill
1002 ; SPE-NEXT: mr r27, r5
1003 ; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill
1004 ; SPE-NEXT: mr r28, r7
1005 ; SPE-NEXT: lwz r3, 112(r1)
1006 ; SPE-NEXT: lwz r4, 104(r1)
1007 ; SPE-NEXT: lwz r5, 108(r1)
1008 ; SPE-NEXT: lwz r7, 116(r1)
1009 ; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill
1010 ; SPE-NEXT: efsneg r22, r3
1011 ; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill
1012 ; SPE-NEXT: efsneg r23, r5
1013 ; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill
1014 ; SPE-NEXT: efsneg r24, r4
1015 ; SPE-NEXT: efsneg r5, r7
1016 ; SPE-NEXT: mr r3, r6
1017 ; SPE-NEXT: mr r4, r10
1018 ; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill
1019 ; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill
1020 ; SPE-NEXT: mr r29, r8
1021 ; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill
1022 ; SPE-NEXT: mr r30, r9
1024 ; SPE-NEXT: mr r21, r3
1025 ; SPE-NEXT: mr r3, r27
1026 ; SPE-NEXT: mr r4, r30
1027 ; SPE-NEXT: mr r5, r22
1029 ; SPE-NEXT: mr r30, r3
1030 ; SPE-NEXT: mr r3, r26
1031 ; SPE-NEXT: mr r4, r29
1032 ; SPE-NEXT: mr r5, r23
1034 ; SPE-NEXT: mr r29, r3
1035 ; SPE-NEXT: mr r3, r25
1036 ; SPE-NEXT: mr r4, r28
1037 ; SPE-NEXT: mr r5, r24
1039 ; SPE-NEXT: mr r4, r29
1040 ; SPE-NEXT: mr r5, r30
1041 ; SPE-NEXT: mr r6, r21
1042 ; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload
1043 ; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload
1044 ; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload
1045 ; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload
1046 ; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload
1047 ; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload
1048 ; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload
1049 ; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload
1050 ; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload
1051 ; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload
1052 ; SPE-NEXT: lwz r0, 100(r1)
1053 ; SPE-NEXT: addi r1, r1, 96
1056 %neg = fneg <4 x float> %vf2
1057 %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
1058 <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg,
1059 metadata !"round.dynamic",
1060 metadata !"fpexcept.strict") #0
1061 ret <4 x float> %res
1064 define <2 x double> @fmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
1065 ; CHECK-LABEL: fmsub_v2f64:
1067 ; CHECK-NEXT: xvmsubadp v4, v2, v3
1068 ; CHECK-NEXT: vmr v2, v4
1071 ; NOVSX-LABEL: fmsub_v2f64:
1073 ; NOVSX-NEXT: fmsub f2, f2, f4, f6
1074 ; NOVSX-NEXT: fmsub f1, f1, f3, f5
1077 ; SPE-LABEL: fmsub_v2f64:
1080 ; SPE-NEXT: stw r0, 4(r1)
1081 ; SPE-NEXT: stwu r1, -64(r1)
1082 ; SPE-NEXT: .cfi_def_cfa_offset 64
1083 ; SPE-NEXT: .cfi_offset lr, 4
1084 ; SPE-NEXT: .cfi_offset r26, -48
1085 ; SPE-NEXT: .cfi_offset r27, -40
1086 ; SPE-NEXT: .cfi_offset r28, -32
1087 ; SPE-NEXT: .cfi_offset r29, -24
1088 ; SPE-NEXT: .cfi_offset r30, -16
1089 ; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill
1090 ; SPE-NEXT: mr r30, r3
1091 ; SPE-NEXT: evldd r3, 80(r1)
1092 ; SPE-NEXT: evldd r11, 88(r1)
1093 ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill
1094 ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill
1095 ; SPE-NEXT: efdneg r27, r11
1096 ; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill
1097 ; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill
1098 ; SPE-NEXT: evmergelo r29, r7, r8
1099 ; SPE-NEXT: evmergelo r9, r9, r10
1100 ; SPE-NEXT: evmergelo r4, r5, r6
1101 ; SPE-NEXT: efdneg r8, r3
1102 ; SPE-NEXT: evmergehi r3, r4, r4
1103 ; SPE-NEXT: evmergehi r5, r9, r9
1104 ; SPE-NEXT: evmergehi r7, r8, r8
1105 ; SPE-NEXT: mr r6, r9
1106 ; SPE-NEXT: evldd r28, 72(r1)
1107 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
1108 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
1109 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
1110 ; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
1111 ; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
1113 ; SPE-NEXT: evmergelo r26, r3, r4
1114 ; SPE-NEXT: evmergehi r3, r29, r29
1115 ; SPE-NEXT: evmergehi r5, r28, r28
1116 ; SPE-NEXT: evmergehi r7, r27, r27
1117 ; SPE-NEXT: mr r4, r29
1118 ; SPE-NEXT: mr r6, r28
1119 ; SPE-NEXT: mr r8, r27
1120 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
1121 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
1122 ; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
1124 ; SPE-NEXT: li r5, 8
1125 ; SPE-NEXT: evmergelo r3, r3, r4
1126 ; SPE-NEXT: evstddx r3, r30, r5
1127 ; SPE-NEXT: evstdd r26, 0(r30)
1128 ; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload
1129 ; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload
1130 ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload
1131 ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload
1132 ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload
1133 ; SPE-NEXT: lwz r0, 68(r1)
1134 ; SPE-NEXT: addi r1, r1, 64
1137 %neg = fneg <2 x double> %vf2
1138 %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
1139 <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg,
1140 metadata !"round.dynamic",
1141 metadata !"fpexcept.strict") #0
1142 ret <2 x double> %res
1145 define float @fnmadd_f32(float %f0, float %f1, float %f2) #0 {
1146 ; CHECK-LABEL: fnmadd_f32:
1148 ; CHECK-NEXT: xsnmaddasp f3, f1, f2
1149 ; CHECK-NEXT: fmr f1, f3
1152 ; NOVSX-LABEL: fnmadd_f32:
1154 ; NOVSX-NEXT: fnmadds f1, f1, f2, f3
1157 ; SPE-LABEL: fnmadd_f32:
1160 ; SPE-NEXT: stw r0, 4(r1)
1161 ; SPE-NEXT: stwu r1, -16(r1)
1162 ; SPE-NEXT: .cfi_def_cfa_offset 16
1163 ; SPE-NEXT: .cfi_offset lr, 4
1165 ; SPE-NEXT: efsneg r3, r3
1166 ; SPE-NEXT: lwz r0, 20(r1)
1167 ; SPE-NEXT: addi r1, r1, 16
1170 %fma = call float @llvm.experimental.constrained.fma.f32(
1171 float %f0, float %f1, float %f2,
1172 metadata !"round.dynamic",
1173 metadata !"fpexcept.strict") #0
1174 %res = fneg float %fma
1178 define double @fnmadd_f64(double %f0, double %f1, double %f2) #0 {
1179 ; CHECK-LABEL: fnmadd_f64:
1181 ; CHECK-NEXT: xsnmaddadp f3, f1, f2
1182 ; CHECK-NEXT: fmr f1, f3
1185 ; NOVSX-LABEL: fnmadd_f64:
1187 ; NOVSX-NEXT: fnmadd f1, f1, f2, f3
1190 ; SPE-LABEL: fnmadd_f64:
1193 ; SPE-NEXT: stw r0, 4(r1)
1194 ; SPE-NEXT: stwu r1, -16(r1)
1195 ; SPE-NEXT: .cfi_def_cfa_offset 16
1196 ; SPE-NEXT: .cfi_offset lr, 4
1197 ; SPE-NEXT: evmergelo r8, r7, r8
1198 ; SPE-NEXT: evmergelo r6, r5, r6
1199 ; SPE-NEXT: evmergelo r4, r3, r4
1200 ; SPE-NEXT: evmergehi r3, r4, r4
1201 ; SPE-NEXT: evmergehi r5, r6, r6
1202 ; SPE-NEXT: evmergehi r7, r8, r8
1203 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
1204 ; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6
1205 ; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
1206 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
1207 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
1208 ; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
1210 ; SPE-NEXT: evmergelo r3, r3, r4
1211 ; SPE-NEXT: efdneg r4, r3
1212 ; SPE-NEXT: evmergehi r3, r4, r4
1213 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
1214 ; SPE-NEXT: lwz r0, 20(r1)
1215 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
1216 ; SPE-NEXT: addi r1, r1, 16
1219 %fma = call double @llvm.experimental.constrained.fma.f64(
1220 double %f0, double %f1, double %f2,
1221 metadata !"round.dynamic",
1222 metadata !"fpexcept.strict") #0
1223 %res = fneg double %fma
1227 define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
1228 ; CHECK-LABEL: fnmadd_v4f32:
1230 ; CHECK-NEXT: xvmaddasp v4, v2, v3
1231 ; CHECK-NEXT: xvnegsp v2, v4
1234 ; NOVSX-LABEL: fnmadd_v4f32:
1236 ; NOVSX-NEXT: addi r3, r1, -32
1237 ; NOVSX-NEXT: addi r4, r1, -48
1238 ; NOVSX-NEXT: stvx v4, 0, r3
1239 ; NOVSX-NEXT: addi r3, r1, -64
1240 ; NOVSX-NEXT: stvx v3, 0, r4
1241 ; NOVSX-NEXT: stvx v2, 0, r3
1242 ; NOVSX-NEXT: vspltisb v2, -1
1243 ; NOVSX-NEXT: addi r3, r1, -16
1244 ; NOVSX-NEXT: lfs f0, -20(r1)
1245 ; NOVSX-NEXT: lfs f1, -36(r1)
1246 ; NOVSX-NEXT: lfs f2, -52(r1)
1247 ; NOVSX-NEXT: vslw v2, v2, v2
1248 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
1249 ; NOVSX-NEXT: lfs f1, -40(r1)
1250 ; NOVSX-NEXT: lfs f2, -56(r1)
1251 ; NOVSX-NEXT: stfs f0, -4(r1)
1252 ; NOVSX-NEXT: lfs f0, -24(r1)
1253 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
1254 ; NOVSX-NEXT: lfs f1, -44(r1)
1255 ; NOVSX-NEXT: lfs f2, -60(r1)
1256 ; NOVSX-NEXT: stfs f0, -8(r1)
1257 ; NOVSX-NEXT: lfs f0, -28(r1)
1258 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
1259 ; NOVSX-NEXT: lfs f1, -48(r1)
1260 ; NOVSX-NEXT: lfs f2, -64(r1)
1261 ; NOVSX-NEXT: stfs f0, -12(r1)
1262 ; NOVSX-NEXT: lfs f0, -32(r1)
1263 ; NOVSX-NEXT: fmadds f0, f2, f1, f0
1264 ; NOVSX-NEXT: stfs f0, -16(r1)
1265 ; NOVSX-NEXT: lvx v3, 0, r3
1266 ; NOVSX-NEXT: vsubfp v2, v2, v3
1269 ; SPE-LABEL: fnmadd_v4f32:
1272 ; SPE-NEXT: stw r0, 4(r1)
1273 ; SPE-NEXT: stwu r1, -96(r1)
1274 ; SPE-NEXT: .cfi_def_cfa_offset 96
1275 ; SPE-NEXT: .cfi_offset lr, 4
1276 ; SPE-NEXT: .cfi_offset r21, -88
1277 ; SPE-NEXT: .cfi_offset r22, -80
1278 ; SPE-NEXT: .cfi_offset r23, -72
1279 ; SPE-NEXT: .cfi_offset r24, -64
1280 ; SPE-NEXT: .cfi_offset r25, -56
1281 ; SPE-NEXT: .cfi_offset r26, -48
1282 ; SPE-NEXT: .cfi_offset r27, -40
1283 ; SPE-NEXT: .cfi_offset r28, -32
1284 ; SPE-NEXT: .cfi_offset r29, -24
1285 ; SPE-NEXT: .cfi_offset r30, -16
1286 ; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill
1287 ; SPE-NEXT: mr r27, r5
1288 ; SPE-NEXT: lwz r5, 116(r1)
1289 ; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill
1290 ; SPE-NEXT: mr r25, r3
1291 ; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill
1292 ; SPE-NEXT: mr r26, r4
1293 ; SPE-NEXT: mr r3, r6
1294 ; SPE-NEXT: mr r4, r10
1295 ; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill
1296 ; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill
1297 ; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill
1298 ; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill
1299 ; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill
1300 ; SPE-NEXT: mr r28, r7
1301 ; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill
1302 ; SPE-NEXT: mr r29, r8
1303 ; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill
1304 ; SPE-NEXT: mr r30, r9
1305 ; SPE-NEXT: lwz r24, 104(r1)
1306 ; SPE-NEXT: lwz r23, 108(r1)
1307 ; SPE-NEXT: lwz r22, 112(r1)
1309 ; SPE-NEXT: mr r21, r3
1310 ; SPE-NEXT: mr r3, r27
1311 ; SPE-NEXT: mr r4, r30
1312 ; SPE-NEXT: mr r5, r22
1314 ; SPE-NEXT: mr r30, r3
1315 ; SPE-NEXT: mr r3, r26
1316 ; SPE-NEXT: mr r4, r29
1317 ; SPE-NEXT: mr r5, r23
1319 ; SPE-NEXT: mr r29, r3
1320 ; SPE-NEXT: mr r3, r25
1321 ; SPE-NEXT: mr r4, r28
1322 ; SPE-NEXT: mr r5, r24
1324 ; SPE-NEXT: efsneg r4, r29
1325 ; SPE-NEXT: efsneg r5, r30
1326 ; SPE-NEXT: efsneg r3, r3
1327 ; SPE-NEXT: efsneg r6, r21
1328 ; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload
1329 ; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload
1330 ; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload
1331 ; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload
1332 ; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload
1333 ; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload
1334 ; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload
1335 ; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload
1336 ; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload
1337 ; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload
1338 ; SPE-NEXT: lwz r0, 100(r1)
1339 ; SPE-NEXT: addi r1, r1, 96
1342 %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
1343 <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2,
1344 metadata !"round.dynamic",
1345 metadata !"fpexcept.strict") #0
1346 %res = fneg <4 x float> %fma
1347 ret <4 x float> %res
1350 define <2 x double> @fnmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
1351 ; CHECK-LABEL: fnmadd_v2f64:
1353 ; CHECK-NEXT: xvnmaddadp v4, v2, v3
1354 ; CHECK-NEXT: vmr v2, v4
1357 ; NOVSX-LABEL: fnmadd_v2f64:
1359 ; NOVSX-NEXT: fnmadd f2, f2, f4, f6
1360 ; NOVSX-NEXT: fnmadd f1, f1, f3, f5
1363 ; SPE-LABEL: fnmadd_v2f64:
1366 ; SPE-NEXT: stw r0, 4(r1)
1367 ; SPE-NEXT: stwu r1, -64(r1)
1368 ; SPE-NEXT: .cfi_def_cfa_offset 64
1369 ; SPE-NEXT: .cfi_offset lr, 4
1370 ; SPE-NEXT: .cfi_offset r26, -48
1371 ; SPE-NEXT: .cfi_offset r27, -40
1372 ; SPE-NEXT: .cfi_offset r28, -32
1373 ; SPE-NEXT: .cfi_offset r29, -24
1374 ; SPE-NEXT: .cfi_offset r30, -16
1375 ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill
1376 ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill
1377 ; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill
1378 ; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill
1379 ; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill
1380 ; SPE-NEXT: evmergelo r27, r7, r8
1381 ; SPE-NEXT: evmergelo r9, r9, r10
1382 ; SPE-NEXT: evmergelo r4, r5, r6
1383 ; SPE-NEXT: mr r30, r3
1384 ; SPE-NEXT: evldd r8, 80(r1)
1385 ; SPE-NEXT: evmergehi r3, r4, r4
1386 ; SPE-NEXT: evmergehi r5, r9, r9
1387 ; SPE-NEXT: mr r6, r9
1388 ; SPE-NEXT: evldd r29, 88(r1)
1389 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
1390 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
1391 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
1392 ; SPE-NEXT: evmergehi r7, r8, r8
1393 ; SPE-NEXT: evldd r28, 72(r1)
1394 ; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
1395 ; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
1397 ; SPE-NEXT: evmergelo r26, r3, r4
1398 ; SPE-NEXT: evmergehi r3, r27, r27
1399 ; SPE-NEXT: evmergehi r5, r28, r28
1400 ; SPE-NEXT: evmergehi r7, r29, r29
1401 ; SPE-NEXT: mr r4, r27
1402 ; SPE-NEXT: mr r6, r28
1403 ; SPE-NEXT: mr r8, r29
1404 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
1405 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
1406 ; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
1408 ; SPE-NEXT: evmergelo r3, r3, r4
1409 ; SPE-NEXT: li r5, 8
1410 ; SPE-NEXT: efdneg r3, r3
1411 ; SPE-NEXT: evstddx r3, r30, r5
1412 ; SPE-NEXT: efdneg r3, r26
1413 ; SPE-NEXT: evstdd r3, 0(r30)
1414 ; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload
1415 ; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload
1416 ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload
1417 ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload
1418 ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload
1419 ; SPE-NEXT: lwz r0, 68(r1)
1420 ; SPE-NEXT: addi r1, r1, 64
1423 %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
1424 <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2,
1425 metadata !"round.dynamic",
1426 metadata !"fpexcept.strict") #0
1427 %res = fneg <2 x double> %fma
1428 ret <2 x double> %res
1431 define float @fnmsub_f32(float %f0, float %f1, float %f2) #0 {
1432 ; CHECK-LABEL: fnmsub_f32:
1434 ; CHECK-NEXT: xsnmsubasp f3, f1, f2
1435 ; CHECK-NEXT: fmr f1, f3
1438 ; NOVSX-LABEL: fnmsub_f32:
1440 ; NOVSX-NEXT: fnmsubs f1, f1, f2, f3
1443 ; SPE-LABEL: fnmsub_f32:
1446 ; SPE-NEXT: stw r0, 4(r1)
1447 ; SPE-NEXT: stwu r1, -16(r1)
1448 ; SPE-NEXT: .cfi_def_cfa_offset 16
1449 ; SPE-NEXT: .cfi_offset lr, 4
1450 ; SPE-NEXT: efsneg r5, r5
1452 ; SPE-NEXT: efsneg r3, r3
1453 ; SPE-NEXT: lwz r0, 20(r1)
1454 ; SPE-NEXT: addi r1, r1, 16
1457 %neg = fneg float %f2
1458 %fma = call float @llvm.experimental.constrained.fma.f32(
1459 float %f0, float %f1, float %neg,
1460 metadata !"round.dynamic",
1461 metadata !"fpexcept.strict") #0
1462 %res = fneg float %fma
1466 define double @fnmsub_f64(double %f0, double %f1, double %f2) #0 {
1467 ; CHECK-LABEL: fnmsub_f64:
1469 ; CHECK-NEXT: xsnmsubadp f3, f1, f2
1470 ; CHECK-NEXT: fmr f1, f3
1473 ; NOVSX-LABEL: fnmsub_f64:
1475 ; NOVSX-NEXT: fnmsub f1, f1, f2, f3
1478 ; SPE-LABEL: fnmsub_f64:
1481 ; SPE-NEXT: stw r0, 4(r1)
1482 ; SPE-NEXT: stwu r1, -16(r1)
1483 ; SPE-NEXT: .cfi_def_cfa_offset 16
1484 ; SPE-NEXT: .cfi_offset lr, 4
1485 ; SPE-NEXT: evmergelo r6, r5, r6
1486 ; SPE-NEXT: evmergelo r4, r3, r4
1487 ; SPE-NEXT: evmergelo r3, r7, r8
1488 ; SPE-NEXT: efdneg r8, r3
1489 ; SPE-NEXT: evmergehi r3, r4, r4
1490 ; SPE-NEXT: evmergehi r5, r6, r6
1491 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
1492 ; SPE-NEXT: # kill: def $r6 killed $r6 killed $s6
1493 ; SPE-NEXT: evmergehi r7, r8, r8
1494 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
1495 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
1496 ; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
1497 ; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
1499 ; SPE-NEXT: evmergelo r3, r3, r4
1500 ; SPE-NEXT: efdneg r4, r3
1501 ; SPE-NEXT: evmergehi r3, r4, r4
1502 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
1503 ; SPE-NEXT: lwz r0, 20(r1)
1504 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
1505 ; SPE-NEXT: addi r1, r1, 16
1508 %neg = fneg double %f2
1509 %fma = call double @llvm.experimental.constrained.fma.f64(
1510 double %f0, double %f1, double %neg,
1511 metadata !"round.dynamic",
1512 metadata !"fpexcept.strict") #0
1513 %res = fneg double %fma
1517 define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
1518 ; CHECK-LABEL: fnmsub_v4f32:
1520 ; CHECK-NEXT: xvnmsubasp v4, v2, v3
1521 ; CHECK-NEXT: vmr v2, v4
1524 ; NOVSX-LABEL: fnmsub_v4f32:
1526 ; NOVSX-NEXT: vspltisb v5, -1
1527 ; NOVSX-NEXT: addi r3, r1, -48
1528 ; NOVSX-NEXT: addi r4, r1, -64
1529 ; NOVSX-NEXT: stvx v3, 0, r3
1530 ; NOVSX-NEXT: addi r3, r1, -32
1531 ; NOVSX-NEXT: stvx v2, 0, r4
1532 ; NOVSX-NEXT: vslw v5, v5, v5
1533 ; NOVSX-NEXT: vsubfp v4, v5, v4
1534 ; NOVSX-NEXT: stvx v4, 0, r3
1535 ; NOVSX-NEXT: addi r3, r1, -16
1536 ; NOVSX-NEXT: lfs f0, -36(r1)
1537 ; NOVSX-NEXT: lfs f1, -52(r1)
1538 ; NOVSX-NEXT: lfs f2, -20(r1)
1539 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
1540 ; NOVSX-NEXT: lfs f1, -56(r1)
1541 ; NOVSX-NEXT: lfs f2, -24(r1)
1542 ; NOVSX-NEXT: stfs f0, -4(r1)
1543 ; NOVSX-NEXT: lfs f0, -40(r1)
1544 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
1545 ; NOVSX-NEXT: lfs f1, -60(r1)
1546 ; NOVSX-NEXT: lfs f2, -28(r1)
1547 ; NOVSX-NEXT: stfs f0, -8(r1)
1548 ; NOVSX-NEXT: lfs f0, -44(r1)
1549 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
1550 ; NOVSX-NEXT: lfs f1, -64(r1)
1551 ; NOVSX-NEXT: lfs f2, -32(r1)
1552 ; NOVSX-NEXT: stfs f0, -12(r1)
1553 ; NOVSX-NEXT: lfs f0, -48(r1)
1554 ; NOVSX-NEXT: fmadds f0, f1, f0, f2
1555 ; NOVSX-NEXT: stfs f0, -16(r1)
1556 ; NOVSX-NEXT: lvx v2, 0, r3
1557 ; NOVSX-NEXT: vsubfp v2, v5, v2
1560 ; SPE-LABEL: fnmsub_v4f32:
1563 ; SPE-NEXT: stw r0, 4(r1)
1564 ; SPE-NEXT: stwu r1, -96(r1)
1565 ; SPE-NEXT: .cfi_def_cfa_offset 96
1566 ; SPE-NEXT: .cfi_offset lr, 4
1567 ; SPE-NEXT: .cfi_offset r21, -88
1568 ; SPE-NEXT: .cfi_offset r22, -80
1569 ; SPE-NEXT: .cfi_offset r23, -72
1570 ; SPE-NEXT: .cfi_offset r24, -64
1571 ; SPE-NEXT: .cfi_offset r25, -56
1572 ; SPE-NEXT: .cfi_offset r26, -48
1573 ; SPE-NEXT: .cfi_offset r27, -40
1574 ; SPE-NEXT: .cfi_offset r28, -32
1575 ; SPE-NEXT: .cfi_offset r29, -24
1576 ; SPE-NEXT: .cfi_offset r30, -16
1577 ; SPE-NEXT: evstdd r25, 40(r1) # 8-byte Folded Spill
1578 ; SPE-NEXT: mr r25, r3
1579 ; SPE-NEXT: evstdd r26, 48(r1) # 8-byte Folded Spill
1580 ; SPE-NEXT: mr r26, r4
1581 ; SPE-NEXT: evstdd r27, 56(r1) # 8-byte Folded Spill
1582 ; SPE-NEXT: mr r27, r5
1583 ; SPE-NEXT: evstdd r28, 64(r1) # 8-byte Folded Spill
1584 ; SPE-NEXT: mr r28, r7
1585 ; SPE-NEXT: lwz r3, 112(r1)
1586 ; SPE-NEXT: lwz r4, 104(r1)
1587 ; SPE-NEXT: lwz r5, 108(r1)
1588 ; SPE-NEXT: lwz r7, 116(r1)
1589 ; SPE-NEXT: evstdd r22, 16(r1) # 8-byte Folded Spill
1590 ; SPE-NEXT: efsneg r22, r3
1591 ; SPE-NEXT: evstdd r23, 24(r1) # 8-byte Folded Spill
1592 ; SPE-NEXT: efsneg r23, r5
1593 ; SPE-NEXT: evstdd r24, 32(r1) # 8-byte Folded Spill
1594 ; SPE-NEXT: efsneg r24, r4
1595 ; SPE-NEXT: efsneg r5, r7
1596 ; SPE-NEXT: mr r3, r6
1597 ; SPE-NEXT: mr r4, r10
1598 ; SPE-NEXT: evstdd r21, 8(r1) # 8-byte Folded Spill
1599 ; SPE-NEXT: evstdd r29, 72(r1) # 8-byte Folded Spill
1600 ; SPE-NEXT: mr r29, r8
1601 ; SPE-NEXT: evstdd r30, 80(r1) # 8-byte Folded Spill
1602 ; SPE-NEXT: mr r30, r9
1604 ; SPE-NEXT: mr r21, r3
1605 ; SPE-NEXT: mr r3, r27
1606 ; SPE-NEXT: mr r4, r30
1607 ; SPE-NEXT: mr r5, r22
1609 ; SPE-NEXT: mr r30, r3
1610 ; SPE-NEXT: mr r3, r26
1611 ; SPE-NEXT: mr r4, r29
1612 ; SPE-NEXT: mr r5, r23
1614 ; SPE-NEXT: mr r29, r3
1615 ; SPE-NEXT: mr r3, r25
1616 ; SPE-NEXT: mr r4, r28
1617 ; SPE-NEXT: mr r5, r24
1619 ; SPE-NEXT: efsneg r4, r29
1620 ; SPE-NEXT: efsneg r5, r30
1621 ; SPE-NEXT: efsneg r3, r3
1622 ; SPE-NEXT: efsneg r6, r21
1623 ; SPE-NEXT: evldd r30, 80(r1) # 8-byte Folded Reload
1624 ; SPE-NEXT: evldd r29, 72(r1) # 8-byte Folded Reload
1625 ; SPE-NEXT: evldd r28, 64(r1) # 8-byte Folded Reload
1626 ; SPE-NEXT: evldd r27, 56(r1) # 8-byte Folded Reload
1627 ; SPE-NEXT: evldd r26, 48(r1) # 8-byte Folded Reload
1628 ; SPE-NEXT: evldd r25, 40(r1) # 8-byte Folded Reload
1629 ; SPE-NEXT: evldd r24, 32(r1) # 8-byte Folded Reload
1630 ; SPE-NEXT: evldd r23, 24(r1) # 8-byte Folded Reload
1631 ; SPE-NEXT: evldd r22, 16(r1) # 8-byte Folded Reload
1632 ; SPE-NEXT: evldd r21, 8(r1) # 8-byte Folded Reload
1633 ; SPE-NEXT: lwz r0, 100(r1)
1634 ; SPE-NEXT: addi r1, r1, 96
1637 %neg = fneg <4 x float> %vf2
1638 %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
1639 <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg,
1640 metadata !"round.dynamic",
1641 metadata !"fpexcept.strict") #0
1642 %res = fneg <4 x float> %fma
1643 ret <4 x float> %res
1646 define <2 x double> @fnmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
1647 ; CHECK-LABEL: fnmsub_v2f64:
1649 ; CHECK-NEXT: xvnmsubadp v4, v2, v3
1650 ; CHECK-NEXT: vmr v2, v4
1653 ; NOVSX-LABEL: fnmsub_v2f64:
1655 ; NOVSX-NEXT: fnmsub f2, f2, f4, f6
1656 ; NOVSX-NEXT: fnmsub f1, f1, f3, f5
1659 ; SPE-LABEL: fnmsub_v2f64:
1662 ; SPE-NEXT: stw r0, 4(r1)
1663 ; SPE-NEXT: stwu r1, -64(r1)
1664 ; SPE-NEXT: .cfi_def_cfa_offset 64
1665 ; SPE-NEXT: .cfi_offset lr, 4
1666 ; SPE-NEXT: .cfi_offset r26, -48
1667 ; SPE-NEXT: .cfi_offset r27, -40
1668 ; SPE-NEXT: .cfi_offset r28, -32
1669 ; SPE-NEXT: .cfi_offset r29, -24
1670 ; SPE-NEXT: .cfi_offset r30, -16
1671 ; SPE-NEXT: evstdd r30, 48(r1) # 8-byte Folded Spill
1672 ; SPE-NEXT: mr r30, r3
1673 ; SPE-NEXT: evldd r3, 80(r1)
1674 ; SPE-NEXT: evldd r11, 88(r1)
1675 ; SPE-NEXT: evstdd r26, 16(r1) # 8-byte Folded Spill
1676 ; SPE-NEXT: evstdd r27, 24(r1) # 8-byte Folded Spill
1677 ; SPE-NEXT: efdneg r27, r11
1678 ; SPE-NEXT: evstdd r28, 32(r1) # 8-byte Folded Spill
1679 ; SPE-NEXT: evstdd r29, 40(r1) # 8-byte Folded Spill
1680 ; SPE-NEXT: evmergelo r29, r7, r8
1681 ; SPE-NEXT: evmergelo r9, r9, r10
1682 ; SPE-NEXT: evmergelo r4, r5, r6
1683 ; SPE-NEXT: efdneg r8, r3
1684 ; SPE-NEXT: evmergehi r3, r4, r4
1685 ; SPE-NEXT: evmergehi r5, r9, r9
1686 ; SPE-NEXT: evmergehi r7, r8, r8
1687 ; SPE-NEXT: mr r6, r9
1688 ; SPE-NEXT: evldd r28, 72(r1)
1689 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
1690 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
1691 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
1692 ; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
1693 ; SPE-NEXT: # kill: def $r8 killed $r8 killed $s8
1695 ; SPE-NEXT: evmergelo r26, r3, r4
1696 ; SPE-NEXT: evmergehi r3, r29, r29
1697 ; SPE-NEXT: evmergehi r5, r28, r28
1698 ; SPE-NEXT: evmergehi r7, r27, r27
1699 ; SPE-NEXT: mr r4, r29
1700 ; SPE-NEXT: mr r6, r28
1701 ; SPE-NEXT: mr r8, r27
1702 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
1703 ; SPE-NEXT: # kill: def $r5 killed $r5 killed $s5
1704 ; SPE-NEXT: # kill: def $r7 killed $r7 killed $s7
1706 ; SPE-NEXT: evmergelo r3, r3, r4
1707 ; SPE-NEXT: li r5, 8
1708 ; SPE-NEXT: efdneg r3, r3
1709 ; SPE-NEXT: evstddx r3, r30, r5
1710 ; SPE-NEXT: efdneg r3, r26
1711 ; SPE-NEXT: evstdd r3, 0(r30)
1712 ; SPE-NEXT: evldd r30, 48(r1) # 8-byte Folded Reload
1713 ; SPE-NEXT: evldd r29, 40(r1) # 8-byte Folded Reload
1714 ; SPE-NEXT: evldd r28, 32(r1) # 8-byte Folded Reload
1715 ; SPE-NEXT: evldd r27, 24(r1) # 8-byte Folded Reload
1716 ; SPE-NEXT: evldd r26, 16(r1) # 8-byte Folded Reload
1717 ; SPE-NEXT: lwz r0, 68(r1)
1718 ; SPE-NEXT: addi r1, r1, 64
1721 %neg = fneg <2 x double> %vf2
1722 %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
1723 <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg,
1724 metadata !"round.dynamic",
1725 metadata !"fpexcept.strict") #0
1726 %res = fneg <2 x double> %fma
1727 ret <2 x double> %res
1730 define float @fsqrt_f32(float %f1) #0 {
1731 ; CHECK-LABEL: fsqrt_f32:
1733 ; CHECK-NEXT: xssqrtsp f1, f1
1736 ; NOVSX-LABEL: fsqrt_f32:
1738 ; NOVSX-NEXT: fsqrts f1, f1
1741 ; SPE-LABEL: fsqrt_f32:
1744 ; SPE-NEXT: stw r0, 4(r1)
1745 ; SPE-NEXT: stwu r1, -16(r1)
1746 ; SPE-NEXT: .cfi_def_cfa_offset 16
1747 ; SPE-NEXT: .cfi_offset lr, 4
1748 ; SPE-NEXT: bl sqrtf
1749 ; SPE-NEXT: lwz r0, 20(r1)
1750 ; SPE-NEXT: addi r1, r1, 16
1753 %res = call float @llvm.experimental.constrained.sqrt.f32(
1755 metadata !"round.dynamic",
1756 metadata !"fpexcept.strict") #0
1760 define double @fsqrt_f64(double %f1) #0 {
1761 ; CHECK-LABEL: fsqrt_f64:
1763 ; CHECK-NEXT: xssqrtdp f1, f1
1766 ; NOVSX-LABEL: fsqrt_f64:
1768 ; NOVSX-NEXT: fsqrt f1, f1
1771 ; SPE-LABEL: fsqrt_f64:
1774 ; SPE-NEXT: stw r0, 4(r1)
1775 ; SPE-NEXT: stwu r1, -16(r1)
1776 ; SPE-NEXT: .cfi_def_cfa_offset 16
1777 ; SPE-NEXT: .cfi_offset lr, 4
1778 ; SPE-NEXT: evmergelo r4, r3, r4
1779 ; SPE-NEXT: evmergehi r3, r4, r4
1780 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
1781 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
1783 ; SPE-NEXT: evmergelo r4, r3, r4
1784 ; SPE-NEXT: evmergehi r3, r4, r4
1785 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
1786 ; SPE-NEXT: lwz r0, 20(r1)
1787 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
1788 ; SPE-NEXT: addi r1, r1, 16
1791 %res = call double @llvm.experimental.constrained.sqrt.f64(
1793 metadata !"round.dynamic",
1794 metadata !"fpexcept.strict") #0
1798 define <4 x float> @fsqrt_v4f32(<4 x float> %vf1) #0 {
1799 ; CHECK-LABEL: fsqrt_v4f32:
1801 ; CHECK-NEXT: xvsqrtsp v2, v2
1804 ; NOVSX-LABEL: fsqrt_v4f32:
1806 ; NOVSX-NEXT: addi r3, r1, -32
1807 ; NOVSX-NEXT: stvx v2, 0, r3
1808 ; NOVSX-NEXT: addi r3, r1, -16
1809 ; NOVSX-NEXT: lfs f0, -20(r1)
1810 ; NOVSX-NEXT: fsqrts f0, f0
1811 ; NOVSX-NEXT: stfs f0, -4(r1)
1812 ; NOVSX-NEXT: lfs f0, -24(r1)
1813 ; NOVSX-NEXT: fsqrts f0, f0
1814 ; NOVSX-NEXT: stfs f0, -8(r1)
1815 ; NOVSX-NEXT: lfs f0, -28(r1)
1816 ; NOVSX-NEXT: fsqrts f0, f0
1817 ; NOVSX-NEXT: stfs f0, -12(r1)
1818 ; NOVSX-NEXT: lfs f0, -32(r1)
1819 ; NOVSX-NEXT: fsqrts f0, f0
1820 ; NOVSX-NEXT: stfs f0, -16(r1)
1821 ; NOVSX-NEXT: lvx v2, 0, r3
1824 ; SPE-LABEL: fsqrt_v4f32:
1827 ; SPE-NEXT: stw r0, 4(r1)
1828 ; SPE-NEXT: stwu r1, -48(r1)
1829 ; SPE-NEXT: .cfi_def_cfa_offset 48
1830 ; SPE-NEXT: .cfi_offset lr, 4
1831 ; SPE-NEXT: .cfi_offset r27, -40
1832 ; SPE-NEXT: .cfi_offset r28, -32
1833 ; SPE-NEXT: .cfi_offset r29, -24
1834 ; SPE-NEXT: .cfi_offset r30, -16
1835 ; SPE-NEXT: evstdd r28, 16(r1) # 8-byte Folded Spill
1836 ; SPE-NEXT: mr r28, r3
1837 ; SPE-NEXT: mr r3, r6
1838 ; SPE-NEXT: evstdd r27, 8(r1) # 8-byte Folded Spill
1839 ; SPE-NEXT: evstdd r29, 24(r1) # 8-byte Folded Spill
1840 ; SPE-NEXT: mr r29, r4
1841 ; SPE-NEXT: evstdd r30, 32(r1) # 8-byte Folded Spill
1842 ; SPE-NEXT: mr r30, r5
1843 ; SPE-NEXT: bl sqrtf
1844 ; SPE-NEXT: mr r27, r3
1845 ; SPE-NEXT: mr r3, r30
1846 ; SPE-NEXT: bl sqrtf
1847 ; SPE-NEXT: mr r30, r3
1848 ; SPE-NEXT: mr r3, r29
1849 ; SPE-NEXT: bl sqrtf
1850 ; SPE-NEXT: mr r29, r3
1851 ; SPE-NEXT: mr r3, r28
1852 ; SPE-NEXT: bl sqrtf
1853 ; SPE-NEXT: mr r4, r29
1854 ; SPE-NEXT: mr r5, r30
1855 ; SPE-NEXT: mr r6, r27
1856 ; SPE-NEXT: evldd r30, 32(r1) # 8-byte Folded Reload
1857 ; SPE-NEXT: evldd r29, 24(r1) # 8-byte Folded Reload
1858 ; SPE-NEXT: evldd r28, 16(r1) # 8-byte Folded Reload
1859 ; SPE-NEXT: evldd r27, 8(r1) # 8-byte Folded Reload
1860 ; SPE-NEXT: lwz r0, 52(r1)
1861 ; SPE-NEXT: addi r1, r1, 48
1864 %res = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(
1866 metadata !"round.dynamic",
1867 metadata !"fpexcept.strict") #0
1868 ret <4 x float> %res
1871 define <2 x double> @fsqrt_v2f64(<2 x double> %vf1) #0 {
1872 ; CHECK-LABEL: fsqrt_v2f64:
1874 ; CHECK-NEXT: xvsqrtdp v2, v2
1877 ; NOVSX-LABEL: fsqrt_v2f64:
1879 ; NOVSX-NEXT: fsqrt f2, f2
1880 ; NOVSX-NEXT: fsqrt f1, f1
1883 ; SPE-LABEL: fsqrt_v2f64:
1886 ; SPE-NEXT: stw r0, 4(r1)
1887 ; SPE-NEXT: stwu r1, -48(r1)
1888 ; SPE-NEXT: .cfi_def_cfa_offset 48
1889 ; SPE-NEXT: .cfi_offset lr, 4
1890 ; SPE-NEXT: .cfi_offset r28, -32
1891 ; SPE-NEXT: .cfi_offset r29, -24
1892 ; SPE-NEXT: .cfi_offset r30, -16
1893 ; SPE-NEXT: evstdd r28, 16(r1) # 8-byte Folded Spill
1894 ; SPE-NEXT: evstdd r29, 24(r1) # 8-byte Folded Spill
1895 ; SPE-NEXT: evstdd r30, 32(r1) # 8-byte Folded Spill
1896 ; SPE-NEXT: evmergelo r29, r7, r8
1897 ; SPE-NEXT: evmergelo r4, r5, r6
1898 ; SPE-NEXT: mr r30, r3
1899 ; SPE-NEXT: evmergehi r3, r4, r4
1900 ; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4
1901 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
1903 ; SPE-NEXT: evmergelo r28, r3, r4
1904 ; SPE-NEXT: evmergehi r3, r29, r29
1905 ; SPE-NEXT: mr r4, r29
1906 ; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3
1908 ; SPE-NEXT: li r5, 8
1909 ; SPE-NEXT: evmergelo r3, r3, r4
1910 ; SPE-NEXT: evstddx r3, r30, r5
1911 ; SPE-NEXT: evstdd r28, 0(r30)
1912 ; SPE-NEXT: evldd r30, 32(r1) # 8-byte Folded Reload
1913 ; SPE-NEXT: evldd r29, 24(r1) # 8-byte Folded Reload
1914 ; SPE-NEXT: evldd r28, 16(r1) # 8-byte Folded Reload
1915 ; SPE-NEXT: lwz r0, 52(r1)
1916 ; SPE-NEXT: addi r1, r1, 48
1919 %res = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
1921 metadata !"round.dynamic",
1922 metadata !"fpexcept.strict") #0
1923 ret <2 x double> %res
1926 attributes #0 = { strictfp }