1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA --check-prefix=FMA-INFS
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-INFS
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-INFS
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512-INFS
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA --check-prefix=FMA-NOINFS
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-NOINFS
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=FMA4 --check-prefix=FMA4-NOINFS
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512-NOINFS
12 ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
15 define float @test_f32_fmadd(float %a0, float %a1, float %a2) {
16 ; FMA-LABEL: test_f32_fmadd:
18 ; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
21 ; FMA4-LABEL: test_f32_fmadd:
23 ; FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
26 ; AVX512-LABEL: test_f32_fmadd:
28 ; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
30 %x = fmul float %a0, %a1
31 %res = fadd float %x, %a2
35 define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
36 ; FMA-LABEL: test_4f32_fmadd:
38 ; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
41 ; FMA4-LABEL: test_4f32_fmadd:
43 ; FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
46 ; AVX512-LABEL: test_4f32_fmadd:
48 ; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
50 %x = fmul <4 x float> %a0, %a1
51 %res = fadd <4 x float> %x, %a2
55 define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
56 ; FMA-LABEL: test_8f32_fmadd:
58 ; FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
61 ; FMA4-LABEL: test_8f32_fmadd:
63 ; FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
66 ; AVX512-LABEL: test_8f32_fmadd:
68 ; AVX512-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
70 %x = fmul <8 x float> %a0, %a1
71 %res = fadd <8 x float> %x, %a2
75 define double @test_f64_fmadd(double %a0, double %a1, double %a2) {
76 ; FMA-LABEL: test_f64_fmadd:
78 ; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
81 ; FMA4-LABEL: test_f64_fmadd:
83 ; FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
86 ; AVX512-LABEL: test_f64_fmadd:
88 ; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
90 %x = fmul double %a0, %a1
91 %res = fadd double %x, %a2
95 define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
96 ; FMA-LABEL: test_2f64_fmadd:
98 ; FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
101 ; FMA4-LABEL: test_2f64_fmadd:
103 ; FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
106 ; AVX512-LABEL: test_2f64_fmadd:
108 ; AVX512-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
110 %x = fmul <2 x double> %a0, %a1
111 %res = fadd <2 x double> %x, %a2
112 ret <2 x double> %res
115 define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
116 ; FMA-LABEL: test_4f64_fmadd:
118 ; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
121 ; FMA4-LABEL: test_4f64_fmadd:
123 ; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
126 ; AVX512-LABEL: test_4f64_fmadd:
128 ; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
130 %x = fmul <4 x double> %a0, %a1
131 %res = fadd <4 x double> %x, %a2
132 ret <4 x double> %res
136 ; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
139 define float @test_f32_fmsub(float %a0, float %a1, float %a2) {
140 ; FMA-LABEL: test_f32_fmsub:
142 ; FMA-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
145 ; FMA4-LABEL: test_f32_fmsub:
147 ; FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
150 ; AVX512-LABEL: test_f32_fmsub:
152 ; AVX512-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
154 %x = fmul float %a0, %a1
155 %res = fsub float %x, %a2
159 define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
160 ; FMA-LABEL: test_4f32_fmsub:
162 ; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
165 ; FMA4-LABEL: test_4f32_fmsub:
167 ; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
170 ; AVX512-LABEL: test_4f32_fmsub:
172 ; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
174 %x = fmul <4 x float> %a0, %a1
175 %res = fsub <4 x float> %x, %a2
179 define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
180 ; FMA-LABEL: test_8f32_fmsub:
182 ; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
185 ; FMA4-LABEL: test_8f32_fmsub:
187 ; FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
190 ; AVX512-LABEL: test_8f32_fmsub:
192 ; AVX512-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
194 %x = fmul <8 x float> %a0, %a1
195 %res = fsub <8 x float> %x, %a2
199 define double @test_f64_fmsub(double %a0, double %a1, double %a2) {
200 ; FMA-LABEL: test_f64_fmsub:
202 ; FMA-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
205 ; FMA4-LABEL: test_f64_fmsub:
207 ; FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
210 ; AVX512-LABEL: test_f64_fmsub:
212 ; AVX512-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
214 %x = fmul double %a0, %a1
215 %res = fsub double %x, %a2
219 define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
220 ; FMA-LABEL: test_2f64_fmsub:
222 ; FMA-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
225 ; FMA4-LABEL: test_2f64_fmsub:
227 ; FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
230 ; AVX512-LABEL: test_2f64_fmsub:
232 ; AVX512-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
234 %x = fmul <2 x double> %a0, %a1
235 %res = fsub <2 x double> %x, %a2
236 ret <2 x double> %res
239 define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
240 ; FMA-LABEL: test_4f64_fmsub:
242 ; FMA-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
245 ; FMA4-LABEL: test_4f64_fmsub:
247 ; FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
250 ; AVX512-LABEL: test_4f64_fmsub:
252 ; AVX512-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
254 %x = fmul <4 x double> %a0, %a1
255 %res = fsub <4 x double> %x, %a2
256 ret <4 x double> %res
260 ; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
263 define float @test_f32_fnmadd(float %a0, float %a1, float %a2) {
264 ; FMA-LABEL: test_f32_fnmadd:
266 ; FMA-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
269 ; FMA4-LABEL: test_f32_fnmadd:
271 ; FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
274 ; AVX512-LABEL: test_f32_fnmadd:
276 ; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
278 %x = fmul float %a0, %a1
279 %res = fsub float %a2, %x
283 define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
284 ; FMA-LABEL: test_4f32_fnmadd:
286 ; FMA-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
289 ; FMA4-LABEL: test_4f32_fnmadd:
291 ; FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
294 ; AVX512-LABEL: test_4f32_fnmadd:
296 ; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
298 %x = fmul <4 x float> %a0, %a1
299 %res = fsub <4 x float> %a2, %x
303 define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
304 ; FMA-LABEL: test_8f32_fnmadd:
306 ; FMA-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
309 ; FMA4-LABEL: test_8f32_fnmadd:
311 ; FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
314 ; AVX512-LABEL: test_8f32_fnmadd:
316 ; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
318 %x = fmul <8 x float> %a0, %a1
319 %res = fsub <8 x float> %a2, %x
323 define double @test_f64_fnmadd(double %a0, double %a1, double %a2) {
324 ; FMA-LABEL: test_f64_fnmadd:
326 ; FMA-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
329 ; FMA4-LABEL: test_f64_fnmadd:
331 ; FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
334 ; AVX512-LABEL: test_f64_fnmadd:
336 ; AVX512-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
338 %x = fmul double %a0, %a1
339 %res = fsub double %a2, %x
343 define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
344 ; FMA-LABEL: test_2f64_fnmadd:
346 ; FMA-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
349 ; FMA4-LABEL: test_2f64_fnmadd:
351 ; FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
354 ; AVX512-LABEL: test_2f64_fnmadd:
356 ; AVX512-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
358 %x = fmul <2 x double> %a0, %a1
359 %res = fsub <2 x double> %a2, %x
360 ret <2 x double> %res
363 define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
364 ; FMA-LABEL: test_4f64_fnmadd:
366 ; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
369 ; FMA4-LABEL: test_4f64_fnmadd:
371 ; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
374 ; AVX512-LABEL: test_4f64_fnmadd:
376 ; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
378 %x = fmul <4 x double> %a0, %a1
379 %res = fsub <4 x double> %a2, %x
380 ret <4 x double> %res
384 ; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
387 define float @test_f32_fnmsub(float %a0, float %a1, float %a2) {
388 ; FMA-LABEL: test_f32_fnmsub:
390 ; FMA-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
393 ; FMA4-LABEL: test_f32_fnmsub:
395 ; FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
398 ; AVX512-LABEL: test_f32_fnmsub:
400 ; AVX512-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
402 %x = fmul float %a0, %a1
403 %y = fsub float -0.000000e+00, %x
404 %res = fsub float %y, %a2
408 define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
409 ; FMA-LABEL: test_4f32_fnmsub:
411 ; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
414 ; FMA4-LABEL: test_4f32_fnmsub:
416 ; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
419 ; AVX512-LABEL: test_4f32_fnmsub:
421 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
423 %x = fmul <4 x float> %a0, %a1
424 %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
425 %res = fsub <4 x float> %y, %a2
429 define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
430 ; FMA-LABEL: test_8f32_fnmsub:
432 ; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
435 ; FMA4-LABEL: test_8f32_fnmsub:
437 ; FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
440 ; AVX512-LABEL: test_8f32_fnmsub:
442 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
444 %x = fmul <8 x float> %a0, %a1
445 %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
446 %res = fsub <8 x float> %y, %a2
450 define double @test_f64_fnmsub(double %a0, double %a1, double %a2) {
451 ; FMA-LABEL: test_f64_fnmsub:
453 ; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
456 ; FMA4-LABEL: test_f64_fnmsub:
458 ; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
461 ; AVX512-LABEL: test_f64_fnmsub:
463 ; AVX512-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
465 %x = fmul double %a0, %a1
466 %y = fsub double -0.000000e+00, %x
467 %res = fsub double %y, %a2
471 define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
472 ; FMA-LABEL: test_2f64_fnmsub:
474 ; FMA-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
477 ; FMA4-LABEL: test_2f64_fnmsub:
479 ; FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
482 ; AVX512-LABEL: test_2f64_fnmsub:
484 ; AVX512-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
486 %x = fmul <2 x double> %a0, %a1
487 %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x
488 %res = fsub <2 x double> %y, %a2
489 ret <2 x double> %res
492 define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
493 ; FMA-LABEL: test_4f64_fnmsub:
495 ; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
498 ; FMA4-LABEL: test_4f64_fnmsub:
500 ; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
503 ; AVX512-LABEL: test_4f64_fnmsub:
505 ; AVX512-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
507 %x = fmul <4 x double> %a0, %a1
508 %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
509 %res = fsub <4 x double> %y, %a2
510 ret <4 x double> %res
514 ; Load Folding Patterns
517 define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
518 ; FMA-LABEL: test_4f32_fmadd_load:
520 ; FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
523 ; FMA4-LABEL: test_4f32_fmadd_load:
525 ; FMA4-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
528 ; AVX512-LABEL: test_4f32_fmadd_load:
530 ; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
532 %x = load <4 x float>, <4 x float>* %a0
533 %y = fmul <4 x float> %x, %a1
534 %res = fadd <4 x float> %y, %a2
538 define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <2 x double> %a2) {
539 ; FMA-LABEL: test_2f64_fmsub_load:
541 ; FMA-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
544 ; FMA4-LABEL: test_2f64_fmsub_load:
546 ; FMA4-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
549 ; AVX512-LABEL: test_2f64_fmsub_load:
551 ; AVX512-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
553 %x = load <2 x double>, <2 x double>* %a0
554 %y = fmul <2 x double> %x, %a1
555 %res = fsub <2 x double> %y, %a2
556 ret <2 x double> %res
560 ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
563 define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
564 ; FMA-INFS-LABEL: test_v4f32_mul_add_x_one_y:
566 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
567 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
568 ; FMA-INFS-NEXT: retq
570 ; FMA4-INFS-LABEL: test_v4f32_mul_add_x_one_y:
571 ; FMA4-INFS: # %bb.0:
572 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
573 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
574 ; FMA4-INFS-NEXT: retq
576 ; AVX512-INFS-LABEL: test_v4f32_mul_add_x_one_y:
577 ; AVX512-INFS: # %bb.0:
578 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
579 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
580 ; AVX512-INFS-NEXT: retq
582 ; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
583 ; FMA-NOINFS: # %bb.0:
584 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
585 ; FMA-NOINFS-NEXT: retq
587 ; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
588 ; FMA4-NOINFS: # %bb.0:
589 ; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
590 ; FMA4-NOINFS-NEXT: retq
592 ; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
593 ; AVX512-NOINFS: # %bb.0:
594 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
595 ; AVX512-NOINFS-NEXT: retq
596 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
597 %m = fmul <4 x float> %a, %y
601 define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
602 ; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one:
604 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
605 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
606 ; FMA-INFS-NEXT: retq
608 ; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one:
609 ; FMA4-INFS: # %bb.0:
610 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
611 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
612 ; FMA4-INFS-NEXT: retq
614 ; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one:
615 ; AVX512-INFS: # %bb.0:
616 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
617 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
618 ; AVX512-INFS-NEXT: retq
620 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
621 ; FMA-NOINFS: # %bb.0:
622 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
623 ; FMA-NOINFS-NEXT: retq
625 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
626 ; FMA4-NOINFS: # %bb.0:
627 ; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
628 ; FMA4-NOINFS-NEXT: retq
630 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
631 ; AVX512-NOINFS: # %bb.0:
632 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
633 ; AVX512-NOINFS-NEXT: retq
634 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
635 %m = fmul <4 x float> %y, %a
639 define <4 x float> @test_v4f32_mul_y_add_x_one_undefs(<4 x float> %x, <4 x float> %y) {
640 ; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
642 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
643 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
644 ; FMA-INFS-NEXT: retq
646 ; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
647 ; FMA4-INFS: # %bb.0:
648 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
649 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
650 ; FMA4-INFS-NEXT: retq
652 ; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
653 ; AVX512-INFS: # %bb.0:
654 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
655 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
656 ; AVX512-INFS-NEXT: retq
658 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
659 ; FMA-NOINFS: # %bb.0:
660 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
661 ; FMA-NOINFS-NEXT: retq
663 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
664 ; FMA4-NOINFS: # %bb.0:
665 ; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
666 ; FMA4-NOINFS-NEXT: retq
668 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
669 ; AVX512-NOINFS: # %bb.0:
670 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
671 ; AVX512-NOINFS-NEXT: retq
672 %a = fadd <4 x float> %x, <float 1.0, float undef, float 1.0, float undef>
673 %m = fmul <4 x float> %y, %a
677 define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) {
678 ; FMA-INFS-LABEL: test_v4f32_mul_add_x_negone_y:
680 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
681 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
682 ; FMA-INFS-NEXT: retq
684 ; FMA4-INFS-LABEL: test_v4f32_mul_add_x_negone_y:
685 ; FMA4-INFS: # %bb.0:
686 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
687 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
688 ; FMA4-INFS-NEXT: retq
690 ; AVX512-INFS-LABEL: test_v4f32_mul_add_x_negone_y:
691 ; AVX512-INFS: # %bb.0:
692 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
693 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
694 ; AVX512-INFS-NEXT: retq
696 ; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
697 ; FMA-NOINFS: # %bb.0:
698 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
699 ; FMA-NOINFS-NEXT: retq
701 ; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
702 ; FMA4-NOINFS: # %bb.0:
703 ; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
704 ; FMA4-NOINFS-NEXT: retq
706 ; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
707 ; AVX512-NOINFS: # %bb.0:
708 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
709 ; AVX512-NOINFS-NEXT: retq
710 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
711 %m = fmul <4 x float> %a, %y
715 define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) {
716 ; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone:
718 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
719 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
720 ; FMA-INFS-NEXT: retq
722 ; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone:
723 ; FMA4-INFS: # %bb.0:
724 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
725 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
726 ; FMA4-INFS-NEXT: retq
728 ; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone:
729 ; AVX512-INFS: # %bb.0:
730 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
731 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
732 ; AVX512-INFS-NEXT: retq
734 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
735 ; FMA-NOINFS: # %bb.0:
736 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
737 ; FMA-NOINFS-NEXT: retq
739 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
740 ; FMA4-NOINFS: # %bb.0:
741 ; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
742 ; FMA4-NOINFS-NEXT: retq
744 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
745 ; AVX512-NOINFS: # %bb.0:
746 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
747 ; AVX512-NOINFS-NEXT: retq
748 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
749 %m = fmul <4 x float> %y, %a
753 define <4 x float> @test_v4f32_mul_y_add_x_negone_undefs(<4 x float> %x, <4 x float> %y) {
754 ; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
756 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
757 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
758 ; FMA-INFS-NEXT: retq
760 ; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
761 ; FMA4-INFS: # %bb.0:
762 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
763 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
764 ; FMA4-INFS-NEXT: retq
766 ; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
767 ; AVX512-INFS: # %bb.0:
768 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
769 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
770 ; AVX512-INFS-NEXT: retq
772 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
773 ; FMA-NOINFS: # %bb.0:
774 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
775 ; FMA-NOINFS-NEXT: retq
777 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
778 ; FMA4-NOINFS: # %bb.0:
779 ; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
780 ; FMA4-NOINFS-NEXT: retq
782 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
783 ; AVX512-NOINFS: # %bb.0:
784 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
785 ; AVX512-NOINFS-NEXT: retq
786 %a = fadd <4 x float> %x, <float undef, float -1.0, float undef, float -1.0>
787 %m = fmul <4 x float> %y, %a
791 define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
792 ; FMA-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
794 ; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
795 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
796 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
797 ; FMA-INFS-NEXT: retq
799 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
800 ; FMA4-INFS: # %bb.0:
801 ; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
802 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
803 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
804 ; FMA4-INFS-NEXT: retq
806 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
807 ; AVX512-INFS: # %bb.0:
808 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
809 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
810 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
811 ; AVX512-INFS-NEXT: retq
813 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
814 ; FMA-NOINFS: # %bb.0:
815 ; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
816 ; FMA-NOINFS-NEXT: retq
818 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
819 ; FMA4-NOINFS: # %bb.0:
820 ; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
821 ; FMA4-NOINFS-NEXT: retq
823 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
824 ; AVX512-NOINFS: # %bb.0:
825 ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
826 ; AVX512-NOINFS-NEXT: retq
827 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
828 %m = fmul <4 x float> %s, %y
832 define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
833 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
835 ; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
836 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
837 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
838 ; FMA-INFS-NEXT: retq
840 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
841 ; FMA4-INFS: # %bb.0:
842 ; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
843 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
844 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
845 ; FMA4-INFS-NEXT: retq
847 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
848 ; AVX512-INFS: # %bb.0:
849 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
850 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
851 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
852 ; AVX512-INFS-NEXT: retq
854 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
855 ; FMA-NOINFS: # %bb.0:
856 ; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
857 ; FMA-NOINFS-NEXT: retq
859 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
860 ; FMA4-NOINFS: # %bb.0:
861 ; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
862 ; FMA4-NOINFS-NEXT: retq
864 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
865 ; AVX512-NOINFS: # %bb.0:
866 ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
867 ; AVX512-NOINFS-NEXT: retq
868 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
869 %m = fmul <4 x float> %y, %s
873 define <4 x float> @test_v4f32_mul_y_sub_one_x_undefs(<4 x float> %x, <4 x float> %y) {
874 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
876 ; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <1.0E+0,u,1.0E+0,1.0E+0>
877 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
878 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
879 ; FMA-INFS-NEXT: retq
881 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
882 ; FMA4-INFS: # %bb.0:
883 ; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <1.0E+0,u,1.0E+0,1.0E+0>
884 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
885 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
886 ; FMA4-INFS-NEXT: retq
888 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
889 ; AVX512-INFS: # %bb.0:
890 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
891 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
892 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
893 ; AVX512-INFS-NEXT: retq
895 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
896 ; FMA-NOINFS: # %bb.0:
897 ; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
898 ; FMA-NOINFS-NEXT: retq
900 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
901 ; FMA4-NOINFS: # %bb.0:
902 ; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
903 ; FMA4-NOINFS-NEXT: retq
905 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
906 ; AVX512-NOINFS: # %bb.0:
907 ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
908 ; AVX512-NOINFS-NEXT: retq
909 %s = fsub <4 x float> <float 1.0, float undef, float 1.0, float 1.0>, %x
910 %m = fmul <4 x float> %y, %s
914 define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
915 ; FMA-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
917 ; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
918 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
919 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
920 ; FMA-INFS-NEXT: retq
922 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
923 ; FMA4-INFS: # %bb.0:
924 ; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
925 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
926 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
927 ; FMA4-INFS-NEXT: retq
929 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
930 ; AVX512-INFS: # %bb.0:
931 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
932 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
933 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
934 ; AVX512-INFS-NEXT: retq
936 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
937 ; FMA-NOINFS: # %bb.0:
938 ; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
939 ; FMA-NOINFS-NEXT: retq
941 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
942 ; FMA4-NOINFS: # %bb.0:
943 ; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
944 ; FMA4-NOINFS-NEXT: retq
946 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
947 ; AVX512-NOINFS: # %bb.0:
948 ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
949 ; AVX512-NOINFS-NEXT: retq
950 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
951 %m = fmul <4 x float> %s, %y
955 define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
956 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
958 ; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
959 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
960 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
961 ; FMA-INFS-NEXT: retq
963 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
964 ; FMA4-INFS: # %bb.0:
965 ; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
966 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
967 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
968 ; FMA4-INFS-NEXT: retq
970 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
971 ; AVX512-INFS: # %bb.0:
972 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
973 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
974 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
975 ; AVX512-INFS-NEXT: retq
977 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
978 ; FMA-NOINFS: # %bb.0:
979 ; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
980 ; FMA-NOINFS-NEXT: retq
982 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
983 ; FMA4-NOINFS: # %bb.0:
984 ; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
985 ; FMA4-NOINFS-NEXT: retq
987 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
988 ; AVX512-NOINFS: # %bb.0:
989 ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
990 ; AVX512-NOINFS-NEXT: retq
991 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
992 %m = fmul <4 x float> %y, %s
996 define <4 x float> @test_v4f32_mul_y_sub_negone_x_undefs(<4 x float> %x, <4 x float> %y) {
997 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
999 ; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <-1.0E+0,-1.0E+0,u,-1.0E+0>
1000 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
1001 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1002 ; FMA-INFS-NEXT: retq
1004 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
1005 ; FMA4-INFS: # %bb.0:
1006 ; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <-1.0E+0,-1.0E+0,u,-1.0E+0>
1007 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
1008 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1009 ; FMA4-INFS-NEXT: retq
1011 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
1012 ; AVX512-INFS: # %bb.0:
1013 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
1014 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
1015 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1016 ; AVX512-INFS-NEXT: retq
1018 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
1019 ; FMA-NOINFS: # %bb.0:
1020 ; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
1021 ; FMA-NOINFS-NEXT: retq
1023 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
1024 ; FMA4-NOINFS: # %bb.0:
1025 ; FMA4-NOINFS-NEXT: vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
1026 ; FMA4-NOINFS-NEXT: retq
1028 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
1029 ; AVX512-NOINFS: # %bb.0:
1030 ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
1031 ; AVX512-NOINFS-NEXT: retq
1032 %s = fsub <4 x float> <float -1.0, float -1.0, float undef, float -1.0>, %x
1033 %m = fmul <4 x float> %y, %s
1037 define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
1038 ; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
1039 ; FMA-INFS: # %bb.0:
1040 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
1041 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
1042 ; FMA-INFS-NEXT: retq
1044 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
1045 ; FMA4-INFS: # %bb.0:
1046 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
1047 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
1048 ; FMA4-INFS-NEXT: retq
1050 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
1051 ; AVX512-INFS: # %bb.0:
1052 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
1053 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
1054 ; AVX512-INFS-NEXT: retq
1056 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
1057 ; FMA-NOINFS: # %bb.0:
1058 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
1059 ; FMA-NOINFS-NEXT: retq
1061 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
1062 ; FMA4-NOINFS: # %bb.0:
1063 ; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
1064 ; FMA4-NOINFS-NEXT: retq
1066 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
1067 ; AVX512-NOINFS: # %bb.0:
1068 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
1069 ; AVX512-NOINFS-NEXT: retq
1070 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
1071 %m = fmul <4 x float> %s, %y
1075 define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
1076 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
1077 ; FMA-INFS: # %bb.0:
1078 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
1079 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1080 ; FMA-INFS-NEXT: retq
1082 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
1083 ; FMA4-INFS: # %bb.0:
1084 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
1085 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1086 ; FMA4-INFS-NEXT: retq
1088 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
1089 ; AVX512-INFS: # %bb.0:
1090 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
1091 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1092 ; AVX512-INFS-NEXT: retq
1094 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
1095 ; FMA-NOINFS: # %bb.0:
1096 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
1097 ; FMA-NOINFS-NEXT: retq
1099 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
1100 ; FMA4-NOINFS: # %bb.0:
1101 ; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
1102 ; FMA4-NOINFS-NEXT: retq
1104 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
1105 ; AVX512-NOINFS: # %bb.0:
1106 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
1107 ; AVX512-NOINFS-NEXT: retq
1108 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
1109 %m = fmul <4 x float> %y, %s
1113 define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float> %y) {
1114 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
1115 ; FMA-INFS: # %bb.0:
1116 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
1117 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1118 ; FMA-INFS-NEXT: retq
1120 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
1121 ; FMA4-INFS: # %bb.0:
1122 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
1123 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1124 ; FMA4-INFS-NEXT: retq
1126 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
1127 ; AVX512-INFS: # %bb.0:
1128 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
1129 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1130 ; AVX512-INFS-NEXT: retq
1132 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
1133 ; FMA-NOINFS: # %bb.0:
1134 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
1135 ; FMA-NOINFS-NEXT: retq
1137 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
1138 ; FMA4-NOINFS: # %bb.0:
1139 ; FMA4-NOINFS-NEXT: vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
1140 ; FMA4-NOINFS-NEXT: retq
1142 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
1143 ; AVX512-NOINFS: # %bb.0:
1144 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
1145 ; AVX512-NOINFS-NEXT: retq
1146 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float undef>
1147 %m = fmul <4 x float> %y, %s
1151 define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {
1152 ; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
1153 ; FMA-INFS: # %bb.0:
1154 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
1155 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
1156 ; FMA-INFS-NEXT: retq
1158 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
1159 ; FMA4-INFS: # %bb.0:
1160 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
1161 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
1162 ; FMA4-INFS-NEXT: retq
1164 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
1165 ; AVX512-INFS: # %bb.0:
1166 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
1167 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
1168 ; AVX512-INFS-NEXT: retq
1170 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
1171 ; FMA-NOINFS: # %bb.0:
1172 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
1173 ; FMA-NOINFS-NEXT: retq
1175 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
1176 ; FMA4-NOINFS: # %bb.0:
1177 ; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
1178 ; FMA4-NOINFS-NEXT: retq
1180 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
1181 ; AVX512-NOINFS: # %bb.0:
1182 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
1183 ; AVX512-NOINFS-NEXT: retq
1184 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
1185 %m = fmul <4 x float> %s, %y
1189 define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {
1190 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1191 ; FMA-INFS: # %bb.0:
1192 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
1193 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1194 ; FMA-INFS-NEXT: retq
1196 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1197 ; FMA4-INFS: # %bb.0:
1198 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
1199 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1200 ; FMA4-INFS-NEXT: retq
1202 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1203 ; AVX512-INFS: # %bb.0:
1204 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
1205 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1206 ; AVX512-INFS-NEXT: retq
1208 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1209 ; FMA-NOINFS: # %bb.0:
1210 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
1211 ; FMA-NOINFS-NEXT: retq
1213 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1214 ; FMA4-NOINFS: # %bb.0:
1215 ; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
1216 ; FMA4-NOINFS-NEXT: retq
1218 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1219 ; AVX512-NOINFS: # %bb.0:
1220 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
1221 ; AVX512-NOINFS-NEXT: retq
1222 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
1223 %m = fmul <4 x float> %y, %s
1227 define <4 x float> @test_v4f32_mul_y_sub_x_negone_undefs(<4 x float> %x, <4 x float> %y) {
1228 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
1229 ; FMA-INFS: # %bb.0:
1230 ; FMA-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
1231 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1232 ; FMA-INFS-NEXT: retq
1234 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
1235 ; FMA4-INFS: # %bb.0:
1236 ; FMA4-INFS-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0
1237 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1238 ; FMA4-INFS-NEXT: retq
1240 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
1241 ; AVX512-INFS: # %bb.0:
1242 ; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to4}, %xmm0, %xmm0
1243 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1244 ; AVX512-INFS-NEXT: retq
1246 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
1247 ; FMA-NOINFS: # %bb.0:
1248 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
1249 ; FMA-NOINFS-NEXT: retq
1251 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
1252 ; FMA4-NOINFS: # %bb.0:
1253 ; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
1254 ; FMA4-NOINFS-NEXT: retq
1256 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
1257 ; AVX512-NOINFS: # %bb.0:
1258 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
1259 ; AVX512-NOINFS-NEXT: retq
1260 %s = fsub <4 x float> %x, <float undef, float -1.0, float -1.0, float -1.0>
1261 %m = fmul <4 x float> %y, %s
1266 ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
1269 define float @test_f32_interp(float %x, float %y, float %t) {
1270 ; FMA-INFS-LABEL: test_f32_interp:
1271 ; FMA-INFS: # %bb.0:
1272 ; FMA-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
1273 ; FMA-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3
1274 ; FMA-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1
1275 ; FMA-INFS-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1276 ; FMA-INFS-NEXT: retq
1278 ; FMA4-INFS-LABEL: test_f32_interp:
1279 ; FMA4-INFS: # %bb.0:
1280 ; FMA4-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
1281 ; FMA4-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3
1282 ; FMA4-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1
1283 ; FMA4-INFS-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
1284 ; FMA4-INFS-NEXT: retq
1286 ; AVX512-INFS-LABEL: test_f32_interp:
1287 ; AVX512-INFS: # %bb.0:
1288 ; AVX512-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
1289 ; AVX512-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3
1290 ; AVX512-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1
1291 ; AVX512-INFS-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1292 ; AVX512-INFS-NEXT: retq
1294 ; FMA-NOINFS-LABEL: test_f32_interp:
1295 ; FMA-NOINFS: # %bb.0:
1296 ; FMA-NOINFS-NEXT: vfnmadd213ss {{.*#+}} xmm1 = -(xmm2 * xmm1) + xmm1
1297 ; FMA-NOINFS-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1298 ; FMA-NOINFS-NEXT: retq
1300 ; FMA4-NOINFS-LABEL: test_f32_interp:
1301 ; FMA4-NOINFS: # %bb.0:
1302 ; FMA4-NOINFS-NEXT: vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1
1303 ; FMA4-NOINFS-NEXT: vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
1304 ; FMA4-NOINFS-NEXT: retq
1306 ; AVX512-NOINFS-LABEL: test_f32_interp:
1307 ; AVX512-NOINFS: # %bb.0:
1308 ; AVX512-NOINFS-NEXT: vfnmadd213ss {{.*#+}} xmm1 = -(xmm2 * xmm1) + xmm1
1309 ; AVX512-NOINFS-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1310 ; AVX512-NOINFS-NEXT: retq
1311 %t1 = fsub float 1.0, %t
1312 %tx = fmul float %x, %t
1313 %ty = fmul float %y, %t1
1314 %r = fadd float %tx, %ty
1318 define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
1319 ; FMA-INFS-LABEL: test_v4f32_interp:
1320 ; FMA-INFS: # %bb.0:
1321 ; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1322 ; FMA-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
1323 ; FMA-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
1324 ; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1325 ; FMA-INFS-NEXT: retq
1327 ; FMA4-INFS-LABEL: test_v4f32_interp:
1328 ; FMA4-INFS: # %bb.0:
1329 ; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1330 ; FMA4-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
1331 ; FMA4-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
1332 ; FMA4-INFS-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
1333 ; FMA4-INFS-NEXT: retq
1335 ; AVX512-INFS-LABEL: test_v4f32_interp:
1336 ; AVX512-INFS: # %bb.0:
1337 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1338 ; AVX512-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
1339 ; AVX512-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
1340 ; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1341 ; AVX512-INFS-NEXT: retq
1343 ; FMA-NOINFS-LABEL: test_v4f32_interp:
1344 ; FMA-NOINFS: # %bb.0:
1345 ; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm1 = -(xmm2 * xmm1) + xmm1
1346 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1347 ; FMA-NOINFS-NEXT: retq
1349 ; FMA4-NOINFS-LABEL: test_v4f32_interp:
1350 ; FMA4-NOINFS: # %bb.0:
1351 ; FMA4-NOINFS-NEXT: vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1
1352 ; FMA4-NOINFS-NEXT: vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
1353 ; FMA4-NOINFS-NEXT: retq
1355 ; AVX512-NOINFS-LABEL: test_v4f32_interp:
1356 ; AVX512-NOINFS: # %bb.0:
1357 ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm1 = -(xmm2 * xmm1) + xmm1
1358 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1359 ; AVX512-NOINFS-NEXT: retq
1360 %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
1361 %tx = fmul <4 x float> %x, %t
1362 %ty = fmul <4 x float> %y, %t1
1363 %r = fadd <4 x float> %tx, %ty
1367 define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
1368 ; FMA-INFS-LABEL: test_v8f32_interp:
1369 ; FMA-INFS: # %bb.0:
1370 ; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1371 ; FMA-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
1372 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
1373 ; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
1374 ; FMA-INFS-NEXT: retq
1376 ; FMA4-INFS-LABEL: test_v8f32_interp:
1377 ; FMA4-INFS: # %bb.0:
1378 ; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1379 ; FMA4-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
1380 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
1381 ; FMA4-INFS-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
1382 ; FMA4-INFS-NEXT: retq
1384 ; AVX512-INFS-LABEL: test_v8f32_interp:
1385 ; AVX512-INFS: # %bb.0:
1386 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1387 ; AVX512-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
1388 ; AVX512-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
1389 ; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
1390 ; AVX512-INFS-NEXT: retq
1392 ; FMA-NOINFS-LABEL: test_v8f32_interp:
1393 ; FMA-NOINFS: # %bb.0:
1394 ; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm1
1395 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
1396 ; FMA-NOINFS-NEXT: retq
1398 ; FMA4-NOINFS-LABEL: test_v8f32_interp:
1399 ; FMA4-NOINFS: # %bb.0:
1400 ; FMA4-NOINFS-NEXT: vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1
1401 ; FMA4-NOINFS-NEXT: vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
1402 ; FMA4-NOINFS-NEXT: retq
1404 ; AVX512-NOINFS-LABEL: test_v8f32_interp:
1405 ; AVX512-NOINFS: # %bb.0:
1406 ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm1
1407 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
1408 ; AVX512-NOINFS-NEXT: retq
1409 %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
1410 %tx = fmul <8 x float> %x, %t
1411 %ty = fmul <8 x float> %y, %t1
1412 %r = fadd <8 x float> %tx, %ty
1416 define double @test_f64_interp(double %x, double %y, double %t) {
1417 ; FMA-INFS-LABEL: test_f64_interp:
1418 ; FMA-INFS: # %bb.0:
1419 ; FMA-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
1420 ; FMA-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3
1421 ; FMA-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1
1422 ; FMA-INFS-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1423 ; FMA-INFS-NEXT: retq
1425 ; FMA4-INFS-LABEL: test_f64_interp:
1426 ; FMA4-INFS: # %bb.0:
1427 ; FMA4-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
1428 ; FMA4-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3
1429 ; FMA4-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1
1430 ; FMA4-INFS-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
1431 ; FMA4-INFS-NEXT: retq
1433 ; AVX512-INFS-LABEL: test_f64_interp:
1434 ; AVX512-INFS: # %bb.0:
1435 ; AVX512-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
1436 ; AVX512-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3
1437 ; AVX512-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1
1438 ; AVX512-INFS-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1439 ; AVX512-INFS-NEXT: retq
1441 ; FMA-NOINFS-LABEL: test_f64_interp:
1442 ; FMA-NOINFS: # %bb.0:
1443 ; FMA-NOINFS-NEXT: vfnmadd213sd {{.*#+}} xmm1 = -(xmm2 * xmm1) + xmm1
1444 ; FMA-NOINFS-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1445 ; FMA-NOINFS-NEXT: retq
1447 ; FMA4-NOINFS-LABEL: test_f64_interp:
1448 ; FMA4-NOINFS: # %bb.0:
1449 ; FMA4-NOINFS-NEXT: vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1
1450 ; FMA4-NOINFS-NEXT: vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
1451 ; FMA4-NOINFS-NEXT: retq
1453 ; AVX512-NOINFS-LABEL: test_f64_interp:
1454 ; AVX512-NOINFS: # %bb.0:
1455 ; AVX512-NOINFS-NEXT: vfnmadd213sd {{.*#+}} xmm1 = -(xmm2 * xmm1) + xmm1
1456 ; AVX512-NOINFS-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1457 ; AVX512-NOINFS-NEXT: retq
1458 %t1 = fsub double 1.0, %t
1459 %tx = fmul double %x, %t
1460 %ty = fmul double %y, %t1
1461 %r = fadd double %tx, %ty
1465 define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
1466 ; FMA-INFS-LABEL: test_v2f64_interp:
1467 ; FMA-INFS: # %bb.0:
1468 ; FMA-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.0E+0,1.0E+0]
1469 ; FMA-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
1470 ; FMA-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
1471 ; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1472 ; FMA-INFS-NEXT: retq
1474 ; FMA4-INFS-LABEL: test_v2f64_interp:
1475 ; FMA4-INFS: # %bb.0:
1476 ; FMA4-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.0E+0,1.0E+0]
1477 ; FMA4-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
1478 ; FMA4-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
1479 ; FMA4-INFS-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
1480 ; FMA4-INFS-NEXT: retq
1482 ; AVX512-INFS-LABEL: test_v2f64_interp:
1483 ; AVX512-INFS: # %bb.0:
1484 ; AVX512-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.0E+0,1.0E+0]
1485 ; AVX512-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
1486 ; AVX512-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
1487 ; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1488 ; AVX512-INFS-NEXT: retq
1490 ; FMA-NOINFS-LABEL: test_v2f64_interp:
1491 ; FMA-NOINFS: # %bb.0:
1492 ; FMA-NOINFS-NEXT: vfnmadd213pd {{.*#+}} xmm1 = -(xmm2 * xmm1) + xmm1
1493 ; FMA-NOINFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1494 ; FMA-NOINFS-NEXT: retq
1496 ; FMA4-NOINFS-LABEL: test_v2f64_interp:
1497 ; FMA4-NOINFS: # %bb.0:
1498 ; FMA4-NOINFS-NEXT: vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1
1499 ; FMA4-NOINFS-NEXT: vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
1500 ; FMA4-NOINFS-NEXT: retq
1502 ; AVX512-NOINFS-LABEL: test_v2f64_interp:
1503 ; AVX512-NOINFS: # %bb.0:
1504 ; AVX512-NOINFS-NEXT: vfnmadd213pd {{.*#+}} xmm1 = -(xmm2 * xmm1) + xmm1
1505 ; AVX512-NOINFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1506 ; AVX512-NOINFS-NEXT: retq
1507 %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
1508 %tx = fmul <2 x double> %x, %t
1509 %ty = fmul <2 x double> %y, %t1
1510 %r = fadd <2 x double> %tx, %ty
1514 define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
1515 ; FMA-INFS-LABEL: test_v4f64_interp:
1516 ; FMA-INFS: # %bb.0:
1517 ; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1518 ; FMA-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
1519 ; FMA-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
1520 ; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
1521 ; FMA-INFS-NEXT: retq
1523 ; FMA4-INFS-LABEL: test_v4f64_interp:
1524 ; FMA4-INFS: # %bb.0:
1525 ; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1526 ; FMA4-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
1527 ; FMA4-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
1528 ; FMA4-INFS-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
1529 ; FMA4-INFS-NEXT: retq
1531 ; AVX512-INFS-LABEL: test_v4f64_interp:
1532 ; AVX512-INFS: # %bb.0:
1533 ; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1534 ; AVX512-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
1535 ; AVX512-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
1536 ; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
1537 ; AVX512-INFS-NEXT: retq
1539 ; FMA-NOINFS-LABEL: test_v4f64_interp:
1540 ; FMA-NOINFS: # %bb.0:
1541 ; FMA-NOINFS-NEXT: vfnmadd213pd {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm1
1542 ; FMA-NOINFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
1543 ; FMA-NOINFS-NEXT: retq
1545 ; FMA4-NOINFS-LABEL: test_v4f64_interp:
1546 ; FMA4-NOINFS: # %bb.0:
1547 ; FMA4-NOINFS-NEXT: vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1
1548 ; FMA4-NOINFS-NEXT: vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
1549 ; FMA4-NOINFS-NEXT: retq
1551 ; AVX512-NOINFS-LABEL: test_v4f64_interp:
1552 ; AVX512-NOINFS: # %bb.0:
1553 ; AVX512-NOINFS-NEXT: vfnmadd213pd {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm1
1554 ; AVX512-NOINFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
1555 ; AVX512-NOINFS-NEXT: retq
1556 %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
1557 %tx = fmul <4 x double> %x, %t
1558 %ty = fmul <4 x double> %y, %t1
1559 %r = fadd <4 x double> %tx, %ty
1564 ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
1567 define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
1568 ; FMA-LABEL: test_v4f32_fneg_fmadd:
1570 ; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
1573 ; FMA4-LABEL: test_v4f32_fneg_fmadd:
1575 ; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
1578 ; AVX512-LABEL: test_v4f32_fneg_fmadd:
1580 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
1582 %mul = fmul <4 x float> %a0, %a1
1583 %add = fadd <4 x float> %mul, %a2
1584 %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
1585 ret <4 x float> %neg
1588 define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
1589 ; FMA-LABEL: test_v4f64_fneg_fmsub:
1591 ; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
1594 ; FMA4-LABEL: test_v4f64_fneg_fmsub:
1596 ; FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
1599 ; AVX512-LABEL: test_v4f64_fneg_fmsub:
1601 ; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
1603 %mul = fmul <4 x double> %a0, %a1
1604 %sub = fsub <4 x double> %mul, %a2
1605 %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
1606 ret <4 x double> %neg
1609 define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
1610 ; FMA-LABEL: test_v4f32_fneg_fnmadd:
1612 ; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
1615 ; FMA4-LABEL: test_v4f32_fneg_fnmadd:
1617 ; FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
1620 ; AVX512-LABEL: test_v4f32_fneg_fnmadd:
1622 ; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
1624 %mul = fmul <4 x float> %a0, %a1
1625 %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
1626 %add = fadd <4 x float> %neg0, %a2
1627 %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
1628 ret <4 x float> %neg1
1631 define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
1632 ; FMA-LABEL: test_v4f64_fneg_fnmsub:
1634 ; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
1637 ; FMA4-LABEL: test_v4f64_fneg_fnmsub:
1639 ; FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
1642 ; AVX512-LABEL: test_v4f64_fneg_fnmsub:
1644 ; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
1646 %mul = fmul <4 x double> %a0, %a1
1647 %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
1648 %sub = fsub <4 x double> %neg0, %a2
1649 %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
1650 ret <4 x double> %neg1
1654 ; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
1657 define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
1658 ; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
1660 ; FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
1663 ; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
1665 ; FMA4-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
1668 ; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
1670 ; AVX512-NEXT: vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0
1672 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
1673 %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
1674 %a = fadd <4 x float> %m0, %m1
1679 ; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
1682 define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 {
1683 ; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
1685 ; FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
1688 ; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
1690 ; FMA4-NEXT: vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
1693 ; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
1695 ; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
1697 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
1698 %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
1699 %a = fadd <4 x float> %m1, %y
1703 ; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
1705 define double @test_f64_fneg_fmul(double %x, double %y) #0 {
1706 ; FMA-LABEL: test_f64_fneg_fmul:
1708 ; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1709 ; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
1712 ; FMA4-LABEL: test_f64_fneg_fmul:
1714 ; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1715 ; FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
1718 ; AVX512-LABEL: test_f64_fneg_fmul:
1720 ; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1721 ; AVX512-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
1723 %m = fmul nsz double %x, %y
1724 %n = fsub double -0.0, %m
1728 define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 {
1729 ; FMA-LABEL: test_v4f32_fneg_fmul:
1731 ; FMA-NEXT: vxorps %xmm2, %xmm2, %xmm2
1732 ; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
1735 ; FMA4-LABEL: test_v4f32_fneg_fmul:
1737 ; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2
1738 ; FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
1741 ; AVX512-LABEL: test_v4f32_fneg_fmul:
1743 ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
1744 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
1746 %m = fmul nsz <4 x float> %x, %y
1747 %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m
1751 define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
1752 ; FMA-LABEL: test_v4f64_fneg_fmul:
1754 ; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1755 ; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
1758 ; FMA4-LABEL: test_v4f64_fneg_fmul:
1760 ; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1761 ; FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
1764 ; AVX512-LABEL: test_v4f64_fneg_fmul:
1766 ; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1767 ; AVX512-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
1769 %m = fmul nsz <4 x double> %x, %y
1770 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
1774 define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 {
1775 ; FMA-LABEL: test_v4f64_fneg_fmul_no_nsz:
1777 ; FMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0
1778 ; FMA-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0
1781 ; FMA4-LABEL: test_v4f64_fneg_fmul_no_nsz:
1783 ; FMA4-NEXT: vmulpd %ymm1, %ymm0, %ymm0
1784 ; FMA4-NEXT: vxorpd {{.*}}(%rip), %ymm0, %ymm0
1787 ; AVX512-LABEL: test_v4f64_fneg_fmul_no_nsz:
1789 ; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0
1790 ; AVX512-NEXT: vxorpd {{.*}}(%rip){1to4}, %ymm0, %ymm0
1792 %m = fmul <4 x double> %x, %y
1793 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
1797 attributes #0 = { "unsafe-fp-math"="true" }