1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_256 %s
3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma,+avx512f | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_512 %s
4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4 | FileCheck -check-prefix=FMA4 %s
6 ; This test checks the fusing of MUL + SUB/ADD to FMSUBADD.
8 define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
9 ; FMA3-LABEL: mul_subadd_pd128:
10 ; FMA3: # %bb.0: # %entry
11 ; FMA3-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2
14 ; FMA4-LABEL: mul_subadd_pd128:
15 ; FMA4: # %bb.0: # %entry
16 ; FMA4-NEXT: vfmsubaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
19 %AB = fmul <2 x double> %A, %B
20 %Sub = fsub <2 x double> %AB, %C
21 %Add = fadd <2 x double> %AB, %C
22 %subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3>
23 ret <2 x double> %subadd
26 define <4 x float> @mul_subadd_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 {
27 ; FMA3-LABEL: mul_subadd_ps128:
28 ; FMA3: # %bb.0: # %entry
29 ; FMA3-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2
32 ; FMA4-LABEL: mul_subadd_ps128:
33 ; FMA4: # %bb.0: # %entry
34 ; FMA4-NEXT: vfmsubaddps {{.*#+}} xmm0 = (xmm0 * xmm1) -/+ xmm2
37 %AB = fmul <4 x float> %A, %B
38 %Sub = fsub <4 x float> %AB, %C
39 %Add = fadd <4 x float> %AB, %C
40 %subadd = shufflevector <4 x float> %Add, <4 x float> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
41 ret <4 x float> %subadd
44 define <4 x double> @mul_subadd_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 {
45 ; FMA3-LABEL: mul_subadd_pd256:
46 ; FMA3: # %bb.0: # %entry
47 ; FMA3-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2
50 ; FMA4-LABEL: mul_subadd_pd256:
51 ; FMA4: # %bb.0: # %entry
52 ; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
55 %AB = fmul <4 x double> %A, %B
56 %Sub = fsub <4 x double> %AB, %C
57 %Add = fadd <4 x double> %AB, %C
58 %subadd = shufflevector <4 x double> %Add, <4 x double> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
59 ret <4 x double> %subadd
62 define <8 x float> @mul_subadd_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 {
63 ; FMA3-LABEL: mul_subadd_ps256:
64 ; FMA3: # %bb.0: # %entry
65 ; FMA3-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2
68 ; FMA4-LABEL: mul_subadd_ps256:
69 ; FMA4: # %bb.0: # %entry
70 ; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm0 = (ymm0 * ymm1) -/+ ymm2
73 %AB = fmul <8 x float> %A, %B
74 %Sub = fsub <8 x float> %AB, %C
75 %Add = fadd <8 x float> %AB, %C
76 %subadd = shufflevector <8 x float> %Add, <8 x float> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
77 ret <8 x float> %subadd
80 define <8 x double> @mul_subadd_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 {
81 ; FMA3_256-LABEL: mul_subadd_pd512:
82 ; FMA3_256: # %bb.0: # %entry
83 ; FMA3_256-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) -/+ ymm4
84 ; FMA3_256-NEXT: vfmsubadd213pd {{.*#+}} ymm1 = (ymm3 * ymm1) -/+ ymm5
87 ; FMA3_512-LABEL: mul_subadd_pd512:
88 ; FMA3_512: # %bb.0: # %entry
89 ; FMA3_512-NEXT: vfmsubadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) -/+ zmm2
92 ; FMA4-LABEL: mul_subadd_pd512:
93 ; FMA4: # %bb.0: # %entry
94 ; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm4
95 ; FMA4-NEXT: vfmsubaddpd {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
98 %AB = fmul <8 x double> %A, %B
99 %Sub = fsub <8 x double> %AB, %C
100 %Add = fadd <8 x double> %AB, %C
101 %subadd = shufflevector <8 x double> %Add, <8 x double> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
102 ret <8 x double> %subadd
105 define <16 x float> @mul_subadd_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 {
106 ; FMA3_256-LABEL: mul_subadd_ps512:
107 ; FMA3_256: # %bb.0: # %entry
108 ; FMA3_256-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) -/+ ymm4
109 ; FMA3_256-NEXT: vfmsubadd213ps {{.*#+}} ymm1 = (ymm3 * ymm1) -/+ ymm5
110 ; FMA3_256-NEXT: retq
112 ; FMA3_512-LABEL: mul_subadd_ps512:
113 ; FMA3_512: # %bb.0: # %entry
114 ; FMA3_512-NEXT: vfmsubadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) -/+ zmm2
115 ; FMA3_512-NEXT: retq
117 ; FMA4-LABEL: mul_subadd_ps512:
118 ; FMA4: # %bb.0: # %entry
119 ; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm4
120 ; FMA4-NEXT: vfmsubaddps {{.*#+}} ymm1 = (ymm1 * ymm3) -/+ ymm5
123 %AB = fmul <16 x float> %A, %B
124 %Sub = fsub <16 x float> %AB, %C
125 %Add = fadd <16 x float> %AB, %C
126 %subadd = shufflevector <16 x float> %Add, <16 x float> %Sub, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
127 ret <16 x float> %subadd
130 ; This should not be matched to fmsubadd because the mul is on the wrong side of the fsub.
131 define <2 x double> @mul_subadd_bad_commute(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
132 ; FMA3-LABEL: mul_subadd_bad_commute:
133 ; FMA3: # %bb.0: # %entry
134 ; FMA3-NEXT: vmulpd %xmm1, %xmm0, %xmm0
135 ; FMA3-NEXT: vsubpd %xmm0, %xmm2, %xmm1
136 ; FMA3-NEXT: vaddpd %xmm2, %xmm0, %xmm0
137 ; FMA3-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
140 ; FMA4-LABEL: mul_subadd_bad_commute:
141 ; FMA4: # %bb.0: # %entry
142 ; FMA4-NEXT: vmulpd %xmm1, %xmm0, %xmm0
143 ; FMA4-NEXT: vsubpd %xmm0, %xmm2, %xmm1
144 ; FMA4-NEXT: vaddpd %xmm2, %xmm0, %xmm0
145 ; FMA4-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
148 %AB = fmul <2 x double> %A, %B
149 %Sub = fsub <2 x double> %C, %AB
150 %Add = fadd <2 x double> %AB, %C
151 %subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3>
152 ret <2 x double> %subadd
155 attributes #0 = { nounwind "unsafe-fp-math"="true" }