1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefixes=FMA,FMA-INFS
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefixes=FMA4,FMA4-INFS
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefixes=FMA4,FMA4-INFS
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefixes=AVX512,AVX512-INFS
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA,FMA-NOINFS
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS
8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS
9 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=AVX512,AVX512-NOINFS
12 ; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
15 define float @test_f32_fmadd(float %a0, float %a1, float %a2) {
16 ; FMA-LABEL: test_f32_fmadd:
18 ; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
21 ; FMA4-LABEL: test_f32_fmadd:
23 ; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
26 ; AVX512-LABEL: test_f32_fmadd:
28 ; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
30 %x = fmul float %a0, %a1
31 %res = fadd float %x, %a2
35 define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
36 ; FMA-LABEL: test_4f32_fmadd:
38 ; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
41 ; FMA4-LABEL: test_4f32_fmadd:
43 ; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
46 ; AVX512-LABEL: test_4f32_fmadd:
48 ; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
50 %x = fmul <4 x float> %a0, %a1
51 %res = fadd <4 x float> %x, %a2
55 define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
56 ; FMA-LABEL: test_8f32_fmadd:
58 ; FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
61 ; FMA4-LABEL: test_8f32_fmadd:
63 ; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2
66 ; AVX512-LABEL: test_8f32_fmadd:
68 ; AVX512-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
70 %x = fmul <8 x float> %a0, %a1
71 %res = fadd <8 x float> %x, %a2
75 define double @test_f64_fmadd(double %a0, double %a1, double %a2) {
76 ; FMA-LABEL: test_f64_fmadd:
78 ; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
81 ; FMA4-LABEL: test_f64_fmadd:
83 ; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
86 ; AVX512-LABEL: test_f64_fmadd:
88 ; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
90 %x = fmul double %a0, %a1
91 %res = fadd double %x, %a2
95 define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
96 ; FMA-LABEL: test_2f64_fmadd:
98 ; FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
101 ; FMA4-LABEL: test_2f64_fmadd:
103 ; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
106 ; AVX512-LABEL: test_2f64_fmadd:
108 ; AVX512-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
110 %x = fmul <2 x double> %a0, %a1
111 %res = fadd <2 x double> %x, %a2
112 ret <2 x double> %res
115 define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
116 ; FMA-LABEL: test_4f64_fmadd:
118 ; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
121 ; FMA4-LABEL: test_4f64_fmadd:
123 ; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2
126 ; AVX512-LABEL: test_4f64_fmadd:
128 ; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
130 %x = fmul <4 x double> %a0, %a1
131 %res = fadd <4 x double> %x, %a2
132 ret <4 x double> %res
136 ; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
139 define float @test_f32_fmsub(float %a0, float %a1, float %a2) {
140 ; FMA-LABEL: test_f32_fmsub:
142 ; FMA-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
145 ; FMA4-LABEL: test_f32_fmsub:
147 ; FMA4-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
150 ; AVX512-LABEL: test_f32_fmsub:
152 ; AVX512-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
154 %x = fmul float %a0, %a1
155 %res = fsub float %x, %a2
159 define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
160 ; FMA-LABEL: test_4f32_fmsub:
162 ; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
165 ; FMA4-LABEL: test_4f32_fmsub:
167 ; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
170 ; AVX512-LABEL: test_4f32_fmsub:
172 ; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
174 %x = fmul <4 x float> %a0, %a1
175 %res = fsub <4 x float> %x, %a2
179 define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
180 ; FMA-LABEL: test_8f32_fmsub:
182 ; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
185 ; FMA4-LABEL: test_8f32_fmsub:
187 ; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm2
190 ; AVX512-LABEL: test_8f32_fmsub:
192 ; AVX512-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
194 %x = fmul <8 x float> %a0, %a1
195 %res = fsub <8 x float> %x, %a2
199 define double @test_f64_fmsub(double %a0, double %a1, double %a2) {
200 ; FMA-LABEL: test_f64_fmsub:
202 ; FMA-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
205 ; FMA4-LABEL: test_f64_fmsub:
207 ; FMA4-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
210 ; AVX512-LABEL: test_f64_fmsub:
212 ; AVX512-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
214 %x = fmul double %a0, %a1
215 %res = fsub double %x, %a2
219 define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
220 ; FMA-LABEL: test_2f64_fmsub:
222 ; FMA-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
225 ; FMA4-LABEL: test_2f64_fmsub:
227 ; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
230 ; AVX512-LABEL: test_2f64_fmsub:
232 ; AVX512-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
234 %x = fmul <2 x double> %a0, %a1
235 %res = fsub <2 x double> %x, %a2
236 ret <2 x double> %res
239 define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
240 ; FMA-LABEL: test_4f64_fmsub:
242 ; FMA-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
245 ; FMA4-LABEL: test_4f64_fmsub:
247 ; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm2
250 ; AVX512-LABEL: test_4f64_fmsub:
252 ; AVX512-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
254 %x = fmul <4 x double> %a0, %a1
255 %res = fsub <4 x double> %x, %a2
256 ret <4 x double> %res
260 ; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
263 define float @test_f32_fnmadd(float %a0, float %a1, float %a2) {
264 ; FMA-LABEL: test_f32_fnmadd:
266 ; FMA-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
269 ; FMA4-LABEL: test_f32_fnmadd:
271 ; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
274 ; AVX512-LABEL: test_f32_fnmadd:
276 ; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
278 %x = fmul float %a0, %a1
279 %res = fsub float %a2, %x
283 define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
284 ; FMA-LABEL: test_4f32_fnmadd:
286 ; FMA-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
289 ; FMA4-LABEL: test_4f32_fnmadd:
291 ; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
294 ; AVX512-LABEL: test_4f32_fnmadd:
296 ; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
298 %x = fmul <4 x float> %a0, %a1
299 %res = fsub <4 x float> %a2, %x
303 define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
304 ; FMA-LABEL: test_8f32_fnmadd:
306 ; FMA-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
309 ; FMA4-LABEL: test_8f32_fnmadd:
311 ; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2
314 ; AVX512-LABEL: test_8f32_fnmadd:
316 ; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
318 %x = fmul <8 x float> %a0, %a1
319 %res = fsub <8 x float> %a2, %x
323 define double @test_f64_fnmadd(double %a0, double %a1, double %a2) {
324 ; FMA-LABEL: test_f64_fnmadd:
326 ; FMA-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
329 ; FMA4-LABEL: test_f64_fnmadd:
331 ; FMA4-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
334 ; AVX512-LABEL: test_f64_fnmadd:
336 ; AVX512-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
338 %x = fmul double %a0, %a1
339 %res = fsub double %a2, %x
343 define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
344 ; FMA-LABEL: test_2f64_fnmadd:
346 ; FMA-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
349 ; FMA4-LABEL: test_2f64_fnmadd:
351 ; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2
354 ; AVX512-LABEL: test_2f64_fnmadd:
356 ; AVX512-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
358 %x = fmul <2 x double> %a0, %a1
359 %res = fsub <2 x double> %a2, %x
360 ret <2 x double> %res
363 define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
364 ; FMA-LABEL: test_4f64_fnmadd:
366 ; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
369 ; FMA4-LABEL: test_4f64_fnmadd:
371 ; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2
374 ; AVX512-LABEL: test_4f64_fnmadd:
376 ; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
378 %x = fmul <4 x double> %a0, %a1
379 %res = fsub <4 x double> %a2, %x
380 ret <4 x double> %res
384 ; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
387 define float @test_f32_fnmsub(float %a0, float %a1, float %a2) {
388 ; FMA-LABEL: test_f32_fnmsub:
390 ; FMA-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
393 ; FMA4-LABEL: test_f32_fnmsub:
395 ; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
398 ; AVX512-LABEL: test_f32_fnmsub:
400 ; AVX512-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
402 %x = fmul float %a0, %a1
403 %y = fsub float -0.000000e+00, %x
404 %res = fsub float %y, %a2
408 define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
409 ; FMA-LABEL: test_4f32_fnmsub:
411 ; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
414 ; FMA4-LABEL: test_4f32_fnmsub:
416 ; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
419 ; AVX512-LABEL: test_4f32_fnmsub:
421 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
423 %x = fmul <4 x float> %a0, %a1
424 %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
425 %res = fsub <4 x float> %y, %a2
429 define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
430 ; FMA-LABEL: test_8f32_fnmsub:
432 ; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
435 ; FMA4-LABEL: test_8f32_fnmsub:
437 ; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2
440 ; AVX512-LABEL: test_8f32_fnmsub:
442 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
444 %x = fmul <8 x float> %a0, %a1
445 %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
446 %res = fsub <8 x float> %y, %a2
450 define double @test_f64_fnmsub(double %a0, double %a1, double %a2) {
451 ; FMA-LABEL: test_f64_fnmsub:
453 ; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
456 ; FMA4-LABEL: test_f64_fnmsub:
458 ; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
461 ; AVX512-LABEL: test_f64_fnmsub:
463 ; AVX512-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
465 %x = fmul double %a0, %a1
466 %y = fsub double -0.000000e+00, %x
467 %res = fsub double %y, %a2
471 define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
472 ; FMA-LABEL: test_2f64_fnmsub:
474 ; FMA-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
477 ; FMA4-LABEL: test_2f64_fnmsub:
479 ; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
482 ; AVX512-LABEL: test_2f64_fnmsub:
484 ; AVX512-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
486 %x = fmul <2 x double> %a0, %a1
487 %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x
488 %res = fsub <2 x double> %y, %a2
489 ret <2 x double> %res
492 define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
493 ; FMA-LABEL: test_4f64_fnmsub:
495 ; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
498 ; FMA4-LABEL: test_4f64_fnmsub:
500 ; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2
503 ; AVX512-LABEL: test_4f64_fnmsub:
505 ; AVX512-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
507 %x = fmul <4 x double> %a0, %a1
508 %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
509 %res = fsub <4 x double> %y, %a2
510 ret <4 x double> %res
514 ; Load Folding Patterns
517 define <4 x float> @test_4f32_fmadd_load(ptr %a0, <4 x float> %a1, <4 x float> %a2) {
518 ; FMA-LABEL: test_4f32_fmadd_load:
520 ; FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
523 ; FMA4-LABEL: test_4f32_fmadd_load:
525 ; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
528 ; AVX512-LABEL: test_4f32_fmadd_load:
530 ; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
532 %x = load <4 x float>, ptr %a0
533 %y = fmul <4 x float> %x, %a1
534 %res = fadd <4 x float> %y, %a2
538 define <2 x double> @test_2f64_fmsub_load(ptr %a0, <2 x double> %a1, <2 x double> %a2) {
539 ; FMA-LABEL: test_2f64_fmsub_load:
541 ; FMA-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
544 ; FMA4-LABEL: test_2f64_fmsub_load:
546 ; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
549 ; AVX512-LABEL: test_2f64_fmsub_load:
551 ; AVX512-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
553 %x = load <2 x double>, ptr %a0
554 %y = fmul <2 x double> %x, %a1
555 %res = fsub <2 x double> %y, %a2
556 ret <2 x double> %res
560 ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
563 define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
564 ; FMA-INFS-LABEL: test_v4f32_mul_add_x_one_y:
566 ; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
567 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
568 ; FMA-INFS-NEXT: retq
570 ; FMA4-INFS-LABEL: test_v4f32_mul_add_x_one_y:
571 ; FMA4-INFS: # %bb.0:
572 ; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
573 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
574 ; FMA4-INFS-NEXT: retq
576 ; AVX512-INFS-LABEL: test_v4f32_mul_add_x_one_y:
577 ; AVX512-INFS: # %bb.0:
578 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
579 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
580 ; AVX512-INFS-NEXT: retq
582 ; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
583 ; FMA-NOINFS: # %bb.0:
584 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
585 ; FMA-NOINFS-NEXT: retq
587 ; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
588 ; FMA4-NOINFS: # %bb.0:
589 ; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
590 ; FMA4-NOINFS-NEXT: retq
592 ; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_one_y:
593 ; AVX512-NOINFS: # %bb.0:
594 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
595 ; AVX512-NOINFS-NEXT: retq
596 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
597 %m = fmul <4 x float> %a, %y
601 define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
602 ; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one:
604 ; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
605 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
606 ; FMA-INFS-NEXT: retq
608 ; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one:
609 ; FMA4-INFS: # %bb.0:
610 ; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
611 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
612 ; FMA4-INFS-NEXT: retq
614 ; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one:
615 ; AVX512-INFS: # %bb.0:
616 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
617 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
618 ; AVX512-INFS-NEXT: retq
620 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
621 ; FMA-NOINFS: # %bb.0:
622 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
623 ; FMA-NOINFS-NEXT: retq
625 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
626 ; FMA4-NOINFS: # %bb.0:
627 ; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
628 ; FMA4-NOINFS-NEXT: retq
630 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one:
631 ; AVX512-NOINFS: # %bb.0:
632 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
633 ; AVX512-NOINFS-NEXT: retq
634 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
635 %m = fmul <4 x float> %y, %a
639 define <4 x float> @test_v4f32_mul_y_add_x_one_undefs(<4 x float> %x, <4 x float> %y) {
640 ; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
642 ; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
643 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
644 ; FMA-INFS-NEXT: retq
646 ; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
647 ; FMA4-INFS: # %bb.0:
648 ; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
649 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
650 ; FMA4-INFS-NEXT: retq
652 ; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
653 ; AVX512-INFS: # %bb.0:
654 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
655 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
656 ; AVX512-INFS-NEXT: retq
658 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
659 ; FMA-NOINFS: # %bb.0:
660 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
661 ; FMA-NOINFS-NEXT: retq
663 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
664 ; FMA4-NOINFS: # %bb.0:
665 ; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
666 ; FMA4-NOINFS-NEXT: retq
668 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs:
669 ; AVX512-NOINFS: # %bb.0:
670 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
671 ; AVX512-NOINFS-NEXT: retq
672 %a = fadd <4 x float> %x, <float 1.0, float undef, float 1.0, float undef>
673 %m = fmul <4 x float> %y, %a
677 define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) {
678 ; FMA-INFS-LABEL: test_v4f32_mul_add_x_negone_y:
680 ; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
681 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
682 ; FMA-INFS-NEXT: retq
684 ; FMA4-INFS-LABEL: test_v4f32_mul_add_x_negone_y:
685 ; FMA4-INFS: # %bb.0:
686 ; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
687 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
688 ; FMA4-INFS-NEXT: retq
690 ; AVX512-INFS-LABEL: test_v4f32_mul_add_x_negone_y:
691 ; AVX512-INFS: # %bb.0:
692 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
693 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
694 ; AVX512-INFS-NEXT: retq
696 ; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
697 ; FMA-NOINFS: # %bb.0:
698 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
699 ; FMA-NOINFS-NEXT: retq
701 ; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
702 ; FMA4-NOINFS: # %bb.0:
703 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
704 ; FMA4-NOINFS-NEXT: retq
706 ; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y:
707 ; AVX512-NOINFS: # %bb.0:
708 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
709 ; AVX512-NOINFS-NEXT: retq
710 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
711 %m = fmul <4 x float> %a, %y
715 define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) {
716 ; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone:
718 ; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
719 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
720 ; FMA-INFS-NEXT: retq
722 ; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone:
723 ; FMA4-INFS: # %bb.0:
724 ; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
725 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
726 ; FMA4-INFS-NEXT: retq
728 ; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone:
729 ; AVX512-INFS: # %bb.0:
730 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
731 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
732 ; AVX512-INFS-NEXT: retq
734 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
735 ; FMA-NOINFS: # %bb.0:
736 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
737 ; FMA-NOINFS-NEXT: retq
739 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
740 ; FMA4-NOINFS: # %bb.0:
741 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
742 ; FMA4-NOINFS-NEXT: retq
744 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone:
745 ; AVX512-NOINFS: # %bb.0:
746 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
747 ; AVX512-NOINFS-NEXT: retq
748 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
749 %m = fmul <4 x float> %y, %a
753 define <4 x float> @test_v4f32_mul_y_add_x_negone_undefs(<4 x float> %x, <4 x float> %y) {
754 ; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
756 ; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
757 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
758 ; FMA-INFS-NEXT: retq
760 ; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
761 ; FMA4-INFS: # %bb.0:
762 ; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
763 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
764 ; FMA4-INFS-NEXT: retq
766 ; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
767 ; AVX512-INFS: # %bb.0:
768 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
769 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
770 ; AVX512-INFS-NEXT: retq
772 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
773 ; FMA-NOINFS: # %bb.0:
774 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
775 ; FMA-NOINFS-NEXT: retq
777 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
778 ; FMA4-NOINFS: # %bb.0:
779 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
780 ; FMA4-NOINFS-NEXT: retq
782 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs:
783 ; AVX512-NOINFS: # %bb.0:
784 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
785 ; AVX512-NOINFS-NEXT: retq
786 %a = fadd <4 x float> %x, <float undef, float -1.0, float undef, float -1.0>
787 %m = fmul <4 x float> %y, %a
791 define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
792 ; FMA-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
794 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
795 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
796 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
797 ; FMA-INFS-NEXT: retq
799 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
800 ; FMA4-INFS: # %bb.0:
801 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
802 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
803 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
804 ; FMA4-INFS-NEXT: retq
806 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_one_x_y:
807 ; AVX512-INFS: # %bb.0:
808 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
809 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
810 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
811 ; AVX512-INFS-NEXT: retq
813 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
814 ; FMA-NOINFS: # %bb.0:
815 ; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
816 ; FMA-NOINFS-NEXT: retq
818 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
819 ; FMA4-NOINFS: # %bb.0:
820 ; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
821 ; FMA4-NOINFS-NEXT: retq
823 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y:
824 ; AVX512-NOINFS: # %bb.0:
825 ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
826 ; AVX512-NOINFS-NEXT: retq
827 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
828 %m = fmul <4 x float> %s, %y
832 define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
833 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
835 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
836 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
837 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
838 ; FMA-INFS-NEXT: retq
840 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
841 ; FMA4-INFS: # %bb.0:
842 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
843 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
844 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
845 ; FMA4-INFS-NEXT: retq
847 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x:
848 ; AVX512-INFS: # %bb.0:
849 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
850 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
851 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
852 ; AVX512-INFS-NEXT: retq
854 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
855 ; FMA-NOINFS: # %bb.0:
856 ; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
857 ; FMA-NOINFS-NEXT: retq
859 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
860 ; FMA4-NOINFS: # %bb.0:
861 ; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
862 ; FMA4-NOINFS-NEXT: retq
864 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x:
865 ; AVX512-NOINFS: # %bb.0:
866 ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
867 ; AVX512-NOINFS-NEXT: retq
868 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
869 %m = fmul <4 x float> %y, %s
873 define <4 x float> @test_v4f32_mul_y_sub_one_x_undefs(<4 x float> %x, <4 x float> %y) {
874 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
876 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
877 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
878 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
879 ; FMA-INFS-NEXT: retq
881 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
882 ; FMA4-INFS: # %bb.0:
883 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
884 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
885 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
886 ; FMA4-INFS-NEXT: retq
888 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
889 ; AVX512-INFS: # %bb.0:
890 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
891 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
892 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
893 ; AVX512-INFS-NEXT: retq
895 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
896 ; FMA-NOINFS: # %bb.0:
897 ; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
898 ; FMA-NOINFS-NEXT: retq
900 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
901 ; FMA4-NOINFS: # %bb.0:
902 ; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
903 ; FMA4-NOINFS-NEXT: retq
905 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs:
906 ; AVX512-NOINFS: # %bb.0:
907 ; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1
908 ; AVX512-NOINFS-NEXT: retq
909 %s = fsub <4 x float> <float 1.0, float undef, float 1.0, float 1.0>, %x
910 %m = fmul <4 x float> %y, %s
914 define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
915 ; FMA-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
917 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
918 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
919 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
920 ; FMA-INFS-NEXT: retq
922 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
923 ; FMA4-INFS: # %bb.0:
924 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
925 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
926 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
927 ; FMA4-INFS-NEXT: retq
929 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_negone_x_y:
930 ; AVX512-INFS: # %bb.0:
931 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
932 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
933 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
934 ; AVX512-INFS-NEXT: retq
936 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
937 ; FMA-NOINFS: # %bb.0:
938 ; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
939 ; FMA-NOINFS-NEXT: retq
941 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
942 ; FMA4-NOINFS: # %bb.0:
943 ; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1
944 ; FMA4-NOINFS-NEXT: retq
946 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y:
947 ; AVX512-NOINFS: # %bb.0:
948 ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
949 ; AVX512-NOINFS-NEXT: retq
950 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
951 %m = fmul <4 x float> %s, %y
955 define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
956 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
958 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
959 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
960 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
961 ; FMA-INFS-NEXT: retq
963 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
964 ; FMA4-INFS: # %bb.0:
965 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
966 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
967 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
968 ; FMA4-INFS-NEXT: retq
970 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x:
971 ; AVX512-INFS: # %bb.0:
972 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
973 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
974 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
975 ; AVX512-INFS-NEXT: retq
977 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
978 ; FMA-NOINFS: # %bb.0:
979 ; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
980 ; FMA-NOINFS-NEXT: retq
982 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
983 ; FMA4-NOINFS: # %bb.0:
984 ; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1
985 ; FMA4-NOINFS-NEXT: retq
987 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x:
988 ; AVX512-NOINFS: # %bb.0:
989 ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
990 ; AVX512-NOINFS-NEXT: retq
991 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
992 %m = fmul <4 x float> %y, %s
996 define <4 x float> @test_v4f32_mul_y_sub_negone_x_undefs(<4 x float> %x, <4 x float> %y) {
997 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
999 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
1000 ; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
1001 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1002 ; FMA-INFS-NEXT: retq
1004 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
1005 ; FMA4-INFS: # %bb.0:
1006 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
1007 ; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
1008 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1009 ; FMA4-INFS-NEXT: retq
1011 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
1012 ; AVX512-INFS: # %bb.0:
1013 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
1014 ; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0
1015 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1016 ; AVX512-INFS-NEXT: retq
1018 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
1019 ; FMA-NOINFS: # %bb.0:
1020 ; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
1021 ; FMA-NOINFS-NEXT: retq
1023 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
1024 ; FMA4-NOINFS: # %bb.0:
1025 ; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1
1026 ; FMA4-NOINFS-NEXT: retq
1028 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs:
1029 ; AVX512-NOINFS: # %bb.0:
1030 ; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1
1031 ; AVX512-NOINFS-NEXT: retq
1032 %s = fsub <4 x float> <float -1.0, float -1.0, float undef, float -1.0>, %x
1033 %m = fmul <4 x float> %y, %s
1037 define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
1038 ; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
1039 ; FMA-INFS: # %bb.0:
1040 ; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1041 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
1042 ; FMA-INFS-NEXT: retq
1044 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
1045 ; FMA4-INFS: # %bb.0:
1046 ; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1047 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
1048 ; FMA4-INFS-NEXT: retq
1050 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y:
1051 ; AVX512-INFS: # %bb.0:
1052 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1053 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
1054 ; AVX512-INFS-NEXT: retq
1056 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
1057 ; FMA-NOINFS: # %bb.0:
1058 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
1059 ; FMA-NOINFS-NEXT: retq
1061 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
1062 ; FMA4-NOINFS: # %bb.0:
1063 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
1064 ; FMA4-NOINFS-NEXT: retq
1066 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y:
1067 ; AVX512-NOINFS: # %bb.0:
1068 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
1069 ; AVX512-NOINFS-NEXT: retq
1070 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
1071 %m = fmul <4 x float> %s, %y
1075 define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
1076 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
1077 ; FMA-INFS: # %bb.0:
1078 ; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1079 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1080 ; FMA-INFS-NEXT: retq
1082 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
1083 ; FMA4-INFS: # %bb.0:
1084 ; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1085 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1086 ; FMA4-INFS-NEXT: retq
1088 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one:
1089 ; AVX512-INFS: # %bb.0:
1090 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1091 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1092 ; AVX512-INFS-NEXT: retq
1094 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
1095 ; FMA-NOINFS: # %bb.0:
1096 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
1097 ; FMA-NOINFS-NEXT: retq
1099 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
1100 ; FMA4-NOINFS: # %bb.0:
1101 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
1102 ; FMA4-NOINFS-NEXT: retq
1104 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one:
1105 ; AVX512-NOINFS: # %bb.0:
1106 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
1107 ; AVX512-NOINFS-NEXT: retq
1108 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
1109 %m = fmul <4 x float> %y, %s
1113 define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float> %y) {
1114 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
1115 ; FMA-INFS: # %bb.0:
1116 ; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1117 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1118 ; FMA-INFS-NEXT: retq
1120 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
1121 ; FMA4-INFS: # %bb.0:
1122 ; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1123 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1124 ; FMA4-INFS-NEXT: retq
1126 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
1127 ; AVX512-INFS: # %bb.0:
1128 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1129 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1130 ; AVX512-INFS-NEXT: retq
1132 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
1133 ; FMA-NOINFS: # %bb.0:
1134 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
1135 ; FMA-NOINFS-NEXT: retq
1137 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
1138 ; FMA4-NOINFS: # %bb.0:
1139 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1
1140 ; FMA4-NOINFS-NEXT: retq
1142 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs:
1143 ; AVX512-NOINFS: # %bb.0:
1144 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1
1145 ; AVX512-NOINFS-NEXT: retq
1146 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float undef>
1147 %m = fmul <4 x float> %y, %s
1151 define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {
1152 ; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
1153 ; FMA-INFS: # %bb.0:
1154 ; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1155 ; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
1156 ; FMA-INFS-NEXT: retq
1158 ; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
1159 ; FMA4-INFS: # %bb.0:
1160 ; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1161 ; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
1162 ; FMA4-INFS-NEXT: retq
1164 ; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y:
1165 ; AVX512-INFS: # %bb.0:
1166 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1167 ; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0
1168 ; AVX512-INFS-NEXT: retq
1170 ; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
1171 ; FMA-NOINFS: # %bb.0:
1172 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
1173 ; FMA-NOINFS-NEXT: retq
1175 ; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
1176 ; FMA4-NOINFS: # %bb.0:
1177 ; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
1178 ; FMA4-NOINFS-NEXT: retq
1180 ; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y:
1181 ; AVX512-NOINFS: # %bb.0:
1182 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
1183 ; AVX512-NOINFS-NEXT: retq
1184 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
1185 %m = fmul <4 x float> %s, %y
1189 define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {
1190 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1191 ; FMA-INFS: # %bb.0:
1192 ; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1193 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1194 ; FMA-INFS-NEXT: retq
1196 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1197 ; FMA4-INFS: # %bb.0:
1198 ; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1199 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1200 ; FMA4-INFS-NEXT: retq
1202 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1203 ; AVX512-INFS: # %bb.0:
1204 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1205 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1206 ; AVX512-INFS-NEXT: retq
1208 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1209 ; FMA-NOINFS: # %bb.0:
1210 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
1211 ; FMA-NOINFS-NEXT: retq
1213 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1214 ; FMA4-NOINFS: # %bb.0:
1215 ; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
1216 ; FMA4-NOINFS-NEXT: retq
1218 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone:
1219 ; AVX512-NOINFS: # %bb.0:
1220 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
1221 ; AVX512-NOINFS-NEXT: retq
1222 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
1223 %m = fmul <4 x float> %y, %s
1227 define <4 x float> @test_v4f32_mul_y_sub_x_negone_undefs(<4 x float> %x, <4 x float> %y) {
1228 ; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
1229 ; FMA-INFS: # %bb.0:
1230 ; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1231 ; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1232 ; FMA-INFS-NEXT: retq
1234 ; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
1235 ; FMA4-INFS: # %bb.0:
1236 ; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1237 ; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1238 ; FMA4-INFS-NEXT: retq
1240 ; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
1241 ; AVX512-INFS: # %bb.0:
1242 ; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1243 ; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0
1244 ; AVX512-INFS-NEXT: retq
1246 ; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
1247 ; FMA-NOINFS: # %bb.0:
1248 ; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
1249 ; FMA-NOINFS-NEXT: retq
1251 ; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
1252 ; FMA4-NOINFS: # %bb.0:
1253 ; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
1254 ; FMA4-NOINFS-NEXT: retq
1256 ; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs:
1257 ; AVX512-NOINFS: # %bb.0:
1258 ; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1
1259 ; AVX512-NOINFS-NEXT: retq
1260 %s = fsub <4 x float> %x, <float undef, float -1.0, float -1.0, float -1.0>
1261 %m = fmul <4 x float> %y, %s
1266 ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
1269 define float @test_f32_interp(float %x, float %y, float %t) {
1270 ; FMA-INFS-LABEL: test_f32_interp:
1271 ; FMA-INFS: # %bb.0:
1272 ; FMA-INFS-NEXT: vmovss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
1273 ; FMA-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3
1274 ; FMA-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1
1275 ; FMA-INFS-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1276 ; FMA-INFS-NEXT: retq
1278 ; FMA4-INFS-LABEL: test_f32_interp:
1279 ; FMA4-INFS: # %bb.0:
1280 ; FMA4-INFS-NEXT: vmovss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
1281 ; FMA4-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3
1282 ; FMA4-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1
1283 ; FMA4-INFS-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
1284 ; FMA4-INFS-NEXT: retq
1286 ; AVX512-INFS-LABEL: test_f32_interp:
1287 ; AVX512-INFS: # %bb.0:
1288 ; AVX512-INFS-NEXT: vmovss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
1289 ; AVX512-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3
1290 ; AVX512-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1
1291 ; AVX512-INFS-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1292 ; AVX512-INFS-NEXT: retq
1294 ; FMA-NOINFS-LABEL: test_f32_interp:
1295 ; FMA-NOINFS: # %bb.0:
1296 ; FMA-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
1297 ; FMA-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
1298 ; FMA-NOINFS-NEXT: retq
1300 ; FMA4-NOINFS-LABEL: test_f32_interp:
1301 ; FMA4-NOINFS: # %bb.0:
1302 ; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
1303 ; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
1304 ; FMA4-NOINFS-NEXT: retq
1306 ; AVX512-NOINFS-LABEL: test_f32_interp:
1307 ; AVX512-NOINFS: # %bb.0:
1308 ; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
1309 ; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
1310 ; AVX512-NOINFS-NEXT: retq
1311 %t1 = fsub nsz float 1.0, %t
1312 %tx = fmul nsz float %x, %t
1313 %ty = fmul nsz float %y, %t1
1314 %r = fadd nsz float %tx, %ty
1318 define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
1319 ; FMA-INFS-LABEL: test_v4f32_interp:
1320 ; FMA-INFS: # %bb.0:
1321 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1322 ; FMA-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
1323 ; FMA-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
1324 ; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1325 ; FMA-INFS-NEXT: retq
1327 ; FMA4-INFS-LABEL: test_v4f32_interp:
1328 ; FMA4-INFS: # %bb.0:
1329 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1330 ; FMA4-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
1331 ; FMA4-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
1332 ; FMA4-INFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
1333 ; FMA4-INFS-NEXT: retq
1335 ; AVX512-INFS-LABEL: test_v4f32_interp:
1336 ; AVX512-INFS: # %bb.0:
1337 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1338 ; AVX512-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3
1339 ; AVX512-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1
1340 ; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1341 ; AVX512-INFS-NEXT: retq
1343 ; FMA-NOINFS-LABEL: test_v4f32_interp:
1344 ; FMA-NOINFS: # %bb.0:
1345 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
1346 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
1347 ; FMA-NOINFS-NEXT: retq
1349 ; FMA4-NOINFS-LABEL: test_v4f32_interp:
1350 ; FMA4-NOINFS: # %bb.0:
1351 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
1352 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
1353 ; FMA4-NOINFS-NEXT: retq
1355 ; AVX512-NOINFS-LABEL: test_v4f32_interp:
1356 ; AVX512-NOINFS: # %bb.0:
1357 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
1358 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
1359 ; AVX512-NOINFS-NEXT: retq
1360 %t1 = fsub nsz <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
1361 %tx = fmul nsz <4 x float> %x, %t
1362 %ty = fmul nsz <4 x float> %y, %t1
1363 %r = fadd nsz <4 x float> %tx, %ty
1367 define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
1368 ; FMA-INFS-LABEL: test_v8f32_interp:
1369 ; FMA-INFS: # %bb.0:
1370 ; FMA-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1371 ; FMA-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
1372 ; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
1373 ; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
1374 ; FMA-INFS-NEXT: retq
1376 ; FMA4-INFS-LABEL: test_v8f32_interp:
1377 ; FMA4-INFS: # %bb.0:
1378 ; FMA4-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1379 ; FMA4-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
1380 ; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
1381 ; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1
1382 ; FMA4-INFS-NEXT: retq
1384 ; AVX512-INFS-LABEL: test_v8f32_interp:
1385 ; AVX512-INFS: # %bb.0:
1386 ; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1387 ; AVX512-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3
1388 ; AVX512-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1
1389 ; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
1390 ; AVX512-INFS-NEXT: retq
1392 ; FMA-NOINFS-LABEL: test_v8f32_interp:
1393 ; FMA-NOINFS: # %bb.0:
1394 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
1395 ; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1
1396 ; FMA-NOINFS-NEXT: retq
1398 ; FMA4-NOINFS-LABEL: test_v8f32_interp:
1399 ; FMA4-NOINFS: # %bb.0:
1400 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
1401 ; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1
1402 ; FMA4-NOINFS-NEXT: retq
1404 ; AVX512-NOINFS-LABEL: test_v8f32_interp:
1405 ; AVX512-NOINFS: # %bb.0:
1406 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
1407 ; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1
1408 ; AVX512-NOINFS-NEXT: retq
1409 %t1 = fsub nsz <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
1410 %tx = fmul nsz <8 x float> %x, %t
1411 %ty = fmul nsz <8 x float> %y, %t1
1412 %r = fadd nsz <8 x float> %tx, %ty
1416 define double @test_f64_interp(double %x, double %y, double %t) {
1417 ; FMA-INFS-LABEL: test_f64_interp:
1418 ; FMA-INFS: # %bb.0:
1419 ; FMA-INFS-NEXT: vmovsd {{.*#+}} xmm3 = [1.0E+0,0.0E+0]
1420 ; FMA-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3
1421 ; FMA-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1
1422 ; FMA-INFS-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1423 ; FMA-INFS-NEXT: retq
1425 ; FMA4-INFS-LABEL: test_f64_interp:
1426 ; FMA4-INFS: # %bb.0:
1427 ; FMA4-INFS-NEXT: vmovsd {{.*#+}} xmm3 = [1.0E+0,0.0E+0]
1428 ; FMA4-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3
1429 ; FMA4-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1
1430 ; FMA4-INFS-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
1431 ; FMA4-INFS-NEXT: retq
1433 ; AVX512-INFS-LABEL: test_f64_interp:
1434 ; AVX512-INFS: # %bb.0:
1435 ; AVX512-INFS-NEXT: vmovsd {{.*#+}} xmm3 = [1.0E+0,0.0E+0]
1436 ; AVX512-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3
1437 ; AVX512-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1
1438 ; AVX512-INFS-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1439 ; AVX512-INFS-NEXT: retq
1441 ; FMA-NOINFS-LABEL: test_f64_interp:
1442 ; FMA-NOINFS: # %bb.0:
1443 ; FMA-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
1444 ; FMA-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
1445 ; FMA-NOINFS-NEXT: retq
1447 ; FMA4-NOINFS-LABEL: test_f64_interp:
1448 ; FMA4-NOINFS: # %bb.0:
1449 ; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
1450 ; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
1451 ; FMA4-NOINFS-NEXT: retq
1453 ; AVX512-NOINFS-LABEL: test_f64_interp:
1454 ; AVX512-NOINFS: # %bb.0:
1455 ; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
1456 ; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
1457 ; AVX512-NOINFS-NEXT: retq
1458 %t1 = fsub nsz double 1.0, %t
1459 %tx = fmul nsz double %x, %t
1460 %ty = fmul nsz double %y, %t1
1461 %r = fadd nsz double %tx, %ty
1465 define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
1466 ; FMA-INFS-LABEL: test_v2f64_interp:
1467 ; FMA-INFS: # %bb.0:
1468 ; FMA-INFS-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0]
1469 ; FMA-INFS-NEXT: # xmm3 = mem[0,0]
1470 ; FMA-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
1471 ; FMA-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
1472 ; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1473 ; FMA-INFS-NEXT: retq
1475 ; FMA4-INFS-LABEL: test_v2f64_interp:
1476 ; FMA4-INFS: # %bb.0:
1477 ; FMA4-INFS-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0]
1478 ; FMA4-INFS-NEXT: # xmm3 = mem[0,0]
1479 ; FMA4-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
1480 ; FMA4-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
1481 ; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1
1482 ; FMA4-INFS-NEXT: retq
1484 ; AVX512-INFS-LABEL: test_v2f64_interp:
1485 ; AVX512-INFS: # %bb.0:
1486 ; AVX512-INFS-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0]
1487 ; AVX512-INFS-NEXT: # xmm3 = mem[0,0]
1488 ; AVX512-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3
1489 ; AVX512-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1
1490 ; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1
1491 ; AVX512-INFS-NEXT: retq
1493 ; FMA-NOINFS-LABEL: test_v2f64_interp:
1494 ; FMA-NOINFS: # %bb.0:
1495 ; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
1496 ; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
1497 ; FMA-NOINFS-NEXT: retq
1499 ; FMA4-NOINFS-LABEL: test_v2f64_interp:
1500 ; FMA4-NOINFS: # %bb.0:
1501 ; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
1502 ; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1
1503 ; FMA4-NOINFS-NEXT: retq
1505 ; AVX512-NOINFS-LABEL: test_v2f64_interp:
1506 ; AVX512-NOINFS: # %bb.0:
1507 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1
1508 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1
1509 ; AVX512-NOINFS-NEXT: retq
1510 %t1 = fsub nsz <2 x double> <double 1.0, double 1.0>, %t
1511 %tx = fmul nsz <2 x double> %x, %t
1512 %ty = fmul nsz <2 x double> %y, %t1
1513 %r = fadd nsz <2 x double> %tx, %ty
1517 define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
1518 ; FMA-INFS-LABEL: test_v4f64_interp:
1519 ; FMA-INFS: # %bb.0:
1520 ; FMA-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1521 ; FMA-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
1522 ; FMA-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
1523 ; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
1524 ; FMA-INFS-NEXT: retq
1526 ; FMA4-INFS-LABEL: test_v4f64_interp:
1527 ; FMA4-INFS: # %bb.0:
1528 ; FMA4-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1529 ; FMA4-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
1530 ; FMA4-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
1531 ; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1
1532 ; FMA4-INFS-NEXT: retq
1534 ; AVX512-INFS-LABEL: test_v4f64_interp:
1535 ; AVX512-INFS: # %bb.0:
1536 ; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
1537 ; AVX512-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3
1538 ; AVX512-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1
1539 ; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
1540 ; AVX512-INFS-NEXT: retq
1542 ; FMA-NOINFS-LABEL: test_v4f64_interp:
1543 ; FMA-NOINFS: # %bb.0:
1544 ; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
1545 ; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1
1546 ; FMA-NOINFS-NEXT: retq
1548 ; FMA4-NOINFS-LABEL: test_v4f64_interp:
1549 ; FMA4-NOINFS: # %bb.0:
1550 ; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
1551 ; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1
1552 ; FMA4-NOINFS-NEXT: retq
1554 ; AVX512-NOINFS-LABEL: test_v4f64_interp:
1555 ; AVX512-NOINFS: # %bb.0:
1556 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1
1557 ; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1
1558 ; AVX512-NOINFS-NEXT: retq
1559 %t1 = fsub nsz <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
1560 %tx = fmul nsz <4 x double> %x, %t
1561 %ty = fmul nsz <4 x double> %y, %t1
1562 %r = fadd nsz <4 x double> %tx, %ty
1567 ; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
1570 define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
1571 ; FMA-LABEL: test_v4f32_fneg_fmadd:
1573 ; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
1576 ; FMA4-LABEL: test_v4f32_fneg_fmadd:
1578 ; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
1581 ; AVX512-LABEL: test_v4f32_fneg_fmadd:
1583 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
1585 %mul = fmul nsz <4 x float> %a0, %a1
1586 %add = fadd nsz <4 x float> %mul, %a2
1587 %neg = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
1588 ret <4 x float> %neg
1591 define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
1592 ; FMA-LABEL: test_v4f64_fneg_fmsub:
1594 ; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
1597 ; FMA4-LABEL: test_v4f64_fneg_fmsub:
1599 ; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2
1602 ; AVX512-LABEL: test_v4f64_fneg_fmsub:
1604 ; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
1606 %mul = fmul nsz <4 x double> %a0, %a1
1607 %sub = fsub nsz <4 x double> %mul, %a2
1608 %neg = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
1609 ret <4 x double> %neg
1612 define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
1613 ; FMA-LABEL: test_v4f32_fneg_fnmadd:
1615 ; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
1618 ; FMA4-LABEL: test_v4f32_fneg_fnmadd:
1620 ; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2
1623 ; AVX512-LABEL: test_v4f32_fneg_fnmadd:
1625 ; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
1627 %mul = fmul nsz <4 x float> %a0, %a1
1628 %neg0 = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
1629 %add = fadd nsz <4 x float> %neg0, %a2
1630 %neg1 = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
1631 ret <4 x float> %neg1
1634 define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
1635 ; FMA-LABEL: test_v4f64_fneg_fnmsub:
1637 ; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
1640 ; FMA4-LABEL: test_v4f64_fneg_fnmsub:
1642 ; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2
1645 ; AVX512-LABEL: test_v4f64_fneg_fnmsub:
1647 ; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
1649 %mul = fmul nsz <4 x double> %a0, %a1
1650 %neg0 = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
1651 %sub = fsub nsz <4 x double> %neg0, %a2
1652 %neg1 = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
1653 ret <4 x double> %neg1
1657 ; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
1660 define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
1661 ; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
1663 ; FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1666 ; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
1668 ; FMA4-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1671 ; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
1673 ; AVX512-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1675 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
1676 %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
1677 %a = fadd <4 x float> %m0, %m1
1682 ; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
1685 define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 {
1686 ; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
1688 ; FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
1691 ; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
1693 ; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
1696 ; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
1698 ; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1
1700 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
1701 %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
1702 %a = fadd <4 x float> %m1, %y
1706 ; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
1708 define double @test_f64_fneg_fmul(double %x, double %y) #0 {
1709 ; FMA-LABEL: test_f64_fneg_fmul:
1711 ; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1712 ; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
1715 ; FMA4-LABEL: test_f64_fneg_fmul:
1717 ; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1718 ; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
1721 ; AVX512-LABEL: test_f64_fneg_fmul:
1723 ; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1724 ; AVX512-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
1726 %m = fmul nsz double %x, %y
1727 %n = fsub double -0.0, %m
1731 define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 {
1732 ; FMA-LABEL: test_v4f32_fneg_fmul:
1734 ; FMA-NEXT: vxorps %xmm2, %xmm2, %xmm2
1735 ; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
1738 ; FMA4-LABEL: test_v4f32_fneg_fmul:
1740 ; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2
1741 ; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2
1744 ; AVX512-LABEL: test_v4f32_fneg_fmul:
1746 ; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
1747 ; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
1749 %m = fmul nsz <4 x float> %x, %y
1750 %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m
1754 define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
1755 ; FMA-LABEL: test_v4f64_fneg_fmul:
1757 ; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1758 ; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
1761 ; FMA4-LABEL: test_v4f64_fneg_fmul:
1763 ; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1764 ; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2
1767 ; AVX512-LABEL: test_v4f64_fneg_fmul:
1769 ; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1770 ; AVX512-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2
1772 %m = fmul nsz <4 x double> %x, %y
1773 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
1777 define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 {
1778 ; FMA-LABEL: test_v4f64_fneg_fmul_no_nsz:
1780 ; FMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0
1781 ; FMA-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1784 ; FMA4-LABEL: test_v4f64_fneg_fmul_no_nsz:
1786 ; FMA4-NEXT: vmulpd %ymm1, %ymm0, %ymm0
1787 ; FMA4-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1790 ; AVX512-LABEL: test_v4f64_fneg_fmul_no_nsz:
1792 ; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0
1793 ; AVX512-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
1795 %m = fmul <4 x double> %x, %y
1796 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
1800 ; ((a*b) + (c*d)) + n1 --> (a*b) + ((c*d) + n1)
1802 define double @fadd_fma_fmul_1(double %a, double %b, double %c, double %d, double %n1) nounwind {
1803 ; FMA-LABEL: fadd_fma_fmul_1:
1805 ; FMA-NEXT: vfmadd213sd {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4
1806 ; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
1809 ; FMA4-LABEL: fadd_fma_fmul_1:
1811 ; FMA4-NEXT: vfmaddsd {{.*#+}} xmm2 = (xmm2 * xmm3) + xmm4
1812 ; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
1815 ; AVX512-LABEL: fadd_fma_fmul_1:
1817 ; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4
1818 ; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
1820 %m1 = fmul fast double %a, %b
1821 %m2 = fmul fast double %c, %d
1822 %a1 = fadd fast double %m1, %m2
1823 %a2 = fadd fast double %a1, %n1
1827 ; Minimum FMF - the 1st fadd is contracted because that combines
1828 ; fmul+fadd as specified by the order of operations; the 2nd fadd
1829 ; requires reassociation to fuse with c*d.
1831 define float @fadd_fma_fmul_fmf(float %a, float %b, float %c, float %d, float %n0) nounwind {
1832 ; FMA-LABEL: fadd_fma_fmul_fmf:
1834 ; FMA-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4
1835 ; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
1838 ; FMA4-LABEL: fadd_fma_fmul_fmf:
1840 ; FMA4-NEXT: vfmaddss {{.*#+}} xmm2 = (xmm2 * xmm3) + xmm4
1841 ; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
1844 ; AVX512-LABEL: fadd_fma_fmul_fmf:
1846 ; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4
1847 ; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
1849 %m1 = fmul float %a, %b
1850 %m2 = fmul float %c, %d
1851 %a1 = fadd contract float %m1, %m2
1852 %a2 = fadd reassoc float %n0, %a1
1858 define float @fadd_fma_fmul_2(float %a, float %b, float %c, float %d, float %n0) nounwind {
1859 ; FMA-LABEL: fadd_fma_fmul_2:
1861 ; FMA-NEXT: vmulss %xmm3, %xmm2, %xmm2
1862 ; FMA-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2
1863 ; FMA-NEXT: vaddss %xmm2, %xmm4, %xmm0
1866 ; FMA4-LABEL: fadd_fma_fmul_2:
1868 ; FMA4-NEXT: vmulss %xmm3, %xmm2, %xmm2
1869 ; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
1870 ; FMA4-NEXT: vaddss %xmm0, %xmm4, %xmm0
1873 ; AVX512-LABEL: fadd_fma_fmul_2:
1875 ; AVX512-NEXT: vmulss %xmm3, %xmm2, %xmm2
1876 ; AVX512-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2
1877 ; AVX512-NEXT: vaddss %xmm2, %xmm4, %xmm0
1879 %m1 = fmul float %a, %b
1880 %m2 = fmul float %c, %d
1881 %a1 = fadd contract float %m1, %m2
1882 %a2 = fadd contract float %n0, %a1
1886 ; The final fadd can be folded with either 1 of the leading fmuls.
1888 define <2 x double> @fadd_fma_fmul_3(<2 x double> %x1, <2 x double> %x2, <2 x double> %x3, <2 x double> %x4, <2 x double> %x5, <2 x double> %x6, <2 x double> %x7, <2 x double> %x8) nounwind {
1889 ; FMA-LABEL: fadd_fma_fmul_3:
1891 ; FMA-NEXT: vmulpd %xmm3, %xmm2, %xmm2
1892 ; FMA-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2
1893 ; FMA-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm7 * xmm6) + xmm2
1894 ; FMA-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm5 * xmm4) + xmm2
1895 ; FMA-NEXT: vmovapd %xmm2, %xmm0
1898 ; FMA4-LABEL: fadd_fma_fmul_3:
1900 ; FMA4-NEXT: vmulpd %xmm3, %xmm2, %xmm2
1901 ; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
1902 ; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm6 * xmm7) + xmm0
1903 ; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm4 * xmm5) + xmm0
1906 ; AVX512-LABEL: fadd_fma_fmul_3:
1908 ; AVX512-NEXT: vmulpd %xmm3, %xmm2, %xmm2
1909 ; AVX512-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2
1910 ; AVX512-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm7 * xmm6) + xmm2
1911 ; AVX512-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm5 * xmm4) + xmm2
1912 ; AVX512-NEXT: vmovapd %xmm2, %xmm0
1914 %m1 = fmul fast <2 x double> %x1, %x2
1915 %m2 = fmul fast <2 x double> %x3, %x4
1916 %m3 = fmul fast <2 x double> %x5, %x6
1917 %m4 = fmul fast <2 x double> %x7, %x8
1918 %a1 = fadd fast <2 x double> %m1, %m2
1919 %a2 = fadd fast <2 x double> %m3, %m4
1920 %a3 = fadd fast <2 x double> %a1, %a2
1921 ret <2 x double> %a3
1926 define float @fadd_fma_fmul_extra_use_1(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind {
1927 ; FMA-LABEL: fadd_fma_fmul_extra_use_1:
1929 ; FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0
1930 ; FMA-NEXT: vmovss %xmm0, (%rdi)
1931 ; FMA-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm0
1932 ; FMA-NEXT: vaddss %xmm2, %xmm4, %xmm0
1935 ; FMA4-LABEL: fadd_fma_fmul_extra_use_1:
1937 ; FMA4-NEXT: vmulss %xmm1, %xmm0, %xmm0
1938 ; FMA4-NEXT: vmovss %xmm0, (%rdi)
1939 ; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm2 * xmm3) + xmm0
1940 ; FMA4-NEXT: vaddss %xmm0, %xmm4, %xmm0
1943 ; AVX512-LABEL: fadd_fma_fmul_extra_use_1:
1945 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
1946 ; AVX512-NEXT: vmovss %xmm0, (%rdi)
1947 ; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm0
1948 ; AVX512-NEXT: vaddss %xmm2, %xmm4, %xmm0
1950 %m1 = fmul fast float %a, %b
1951 store float %m1, ptr %p
1952 %m2 = fmul fast float %c, %d
1953 %a1 = fadd fast float %m1, %m2
1954 %a2 = fadd fast float %n0, %a1
1960 define float @fadd_fma_fmul_extra_use_2(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind {
1961 ; FMA-LABEL: fadd_fma_fmul_extra_use_2:
1963 ; FMA-NEXT: vmulss %xmm3, %xmm2, %xmm2
1964 ; FMA-NEXT: vmovss %xmm2, (%rdi)
1965 ; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
1966 ; FMA-NEXT: vaddss %xmm0, %xmm4, %xmm0
1969 ; FMA4-LABEL: fadd_fma_fmul_extra_use_2:
1971 ; FMA4-NEXT: vmulss %xmm3, %xmm2, %xmm2
1972 ; FMA4-NEXT: vmovss %xmm2, (%rdi)
1973 ; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
1974 ; FMA4-NEXT: vaddss %xmm0, %xmm4, %xmm0
1977 ; AVX512-LABEL: fadd_fma_fmul_extra_use_2:
1979 ; AVX512-NEXT: vmulss %xmm3, %xmm2, %xmm2
1980 ; AVX512-NEXT: vmovss %xmm2, (%rdi)
1981 ; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
1982 ; AVX512-NEXT: vaddss %xmm0, %xmm4, %xmm0
1984 %m1 = fmul fast float %a, %b
1985 %m2 = fmul fast float %c, %d
1986 store float %m2, ptr %p
1987 %a1 = fadd fast float %m1, %m2
1988 %a2 = fadd fast float %n0, %a1
1994 define float @fadd_fma_fmul_extra_use_3(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind {
1995 ; FMA-LABEL: fadd_fma_fmul_extra_use_3:
1997 ; FMA-NEXT: vmulss %xmm3, %xmm2, %xmm2
1998 ; FMA-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2
1999 ; FMA-NEXT: vmovss %xmm2, (%rdi)
2000 ; FMA-NEXT: vaddss %xmm2, %xmm4, %xmm0
2003 ; FMA4-LABEL: fadd_fma_fmul_extra_use_3:
2005 ; FMA4-NEXT: vmulss %xmm3, %xmm2, %xmm2
2006 ; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
2007 ; FMA4-NEXT: vmovss %xmm0, (%rdi)
2008 ; FMA4-NEXT: vaddss %xmm0, %xmm4, %xmm0
2011 ; AVX512-LABEL: fadd_fma_fmul_extra_use_3:
2013 ; AVX512-NEXT: vmulss %xmm3, %xmm2, %xmm2
2014 ; AVX512-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2
2015 ; AVX512-NEXT: vmovss %xmm2, (%rdi)
2016 ; AVX512-NEXT: vaddss %xmm2, %xmm4, %xmm0
2018 %m1 = fmul fast float %a, %b
2019 %m2 = fmul fast float %c, %d
2020 %a1 = fadd fast float %m1, %m2
2021 store float %a1, ptr %p
2022 %a2 = fadd fast float %n0, %a1
2026 attributes #0 = { "unsafe-fp-math"="true" }