1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_256 %s
3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma,+avx512f | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_512 %s
4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4 | FileCheck -check-prefix=FMA4 %s
6 ; This test checks the fusing of MUL + SUB/ADD to FMSUBADD.
8 define <2 x double> @mul_subadd_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
9 ; FMA3_256-LABEL: mul_subadd_pd128:
10 ; FMA3_256: # %bb.0: # %entry
11 ; FMA3_256-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0
14 ; FMA3_512-LABEL: mul_subadd_pd128:
15 ; FMA3_512: # %bb.0: # %entry
16 ; FMA3_512-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0
19 ; FMA4-LABEL: mul_subadd_pd128:
20 ; FMA4: # %bb.0: # %entry
21 ; FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
24 %AB = fmul <2 x double> %A, %B
25 %Sub = fsub <2 x double> %AB, %C
26 %Add = fadd <2 x double> %AB, %C
27 %subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3>
28 ret <2 x double> %subadd
31 define <4 x float> @mul_subadd_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 {
32 ; FMA3-LABEL: mul_subadd_ps128:
33 ; FMA3: # %bb.0: # %entry
34 ; FMA3-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0
37 ; FMA4-LABEL: mul_subadd_ps128:
38 ; FMA4: # %bb.0: # %entry
39 ; FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
42 %AB = fmul <4 x float> %A, %B
43 %Sub = fsub <4 x float> %AB, %C
44 %Add = fadd <4 x float> %AB, %C
45 %subadd = shufflevector <4 x float> %Add, <4 x float> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
46 ret <4 x float> %subadd
49 define <4 x double> @mul_subadd_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 {
50 ; FMA3-LABEL: mul_subadd_pd256:
51 ; FMA3: # %bb.0: # %entry
52 ; FMA3-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0
55 ; FMA4-LABEL: mul_subadd_pd256:
56 ; FMA4: # %bb.0: # %entry
57 ; FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
60 %AB = fmul <4 x double> %A, %B
61 %Sub = fsub <4 x double> %AB, %C
62 %Add = fadd <4 x double> %AB, %C
63 %subadd = shufflevector <4 x double> %Add, <4 x double> %Sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
64 ret <4 x double> %subadd
67 define <8 x float> @mul_subadd_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 {
68 ; FMA3-LABEL: mul_subadd_ps256:
69 ; FMA3: # %bb.0: # %entry
70 ; FMA3-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0
73 ; FMA4-LABEL: mul_subadd_ps256:
74 ; FMA4: # %bb.0: # %entry
75 ; FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
78 %AB = fmul <8 x float> %A, %B
79 %Sub = fsub <8 x float> %AB, %C
80 %Add = fadd <8 x float> %AB, %C
81 %subadd = shufflevector <8 x float> %Add, <8 x float> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
82 ret <8 x float> %subadd
85 define <8 x double> @mul_subadd_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 {
86 ; FMA3_256-LABEL: mul_subadd_pd512:
87 ; FMA3_256: # %bb.0: # %entry
88 ; FMA3_256-NEXT: vfmsubadd213pd %ymm4, %ymm2, %ymm0
89 ; FMA3_256-NEXT: vfmsubadd213pd %ymm5, %ymm3, %ymm1
92 ; FMA3_512-LABEL: mul_subadd_pd512:
93 ; FMA3_512: # %bb.0: # %entry
94 ; FMA3_512-NEXT: vfmsubadd213pd %zmm2, %zmm1, %zmm0
97 ; FMA4-LABEL: mul_subadd_pd512:
98 ; FMA4: # %bb.0: # %entry
99 ; FMA4-NEXT: vfmsubaddpd %ymm4, %ymm2, %ymm0, %ymm0
100 ; FMA4-NEXT: vfmsubaddpd %ymm5, %ymm3, %ymm1, %ymm1
103 %AB = fmul <8 x double> %A, %B
104 %Sub = fsub <8 x double> %AB, %C
105 %Add = fadd <8 x double> %AB, %C
106 %subadd = shufflevector <8 x double> %Add, <8 x double> %Sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
107 ret <8 x double> %subadd
110 define <16 x float> @mul_subadd_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 {
111 ; FMA3_256-LABEL: mul_subadd_ps512:
112 ; FMA3_256: # %bb.0: # %entry
113 ; FMA3_256-NEXT: vfmsubadd213ps %ymm4, %ymm2, %ymm0
114 ; FMA3_256-NEXT: vfmsubadd213ps %ymm5, %ymm3, %ymm1
115 ; FMA3_256-NEXT: retq
117 ; FMA3_512-LABEL: mul_subadd_ps512:
118 ; FMA3_512: # %bb.0: # %entry
119 ; FMA3_512-NEXT: vfmsubadd213ps %zmm2, %zmm1, %zmm0
120 ; FMA3_512-NEXT: retq
122 ; FMA4-LABEL: mul_subadd_ps512:
123 ; FMA4: # %bb.0: # %entry
124 ; FMA4-NEXT: vfmsubaddps %ymm4, %ymm2, %ymm0, %ymm0
125 ; FMA4-NEXT: vfmsubaddps %ymm5, %ymm3, %ymm1, %ymm1
128 %AB = fmul <16 x float> %A, %B
129 %Sub = fsub <16 x float> %AB, %C
130 %Add = fadd <16 x float> %AB, %C
131 %subadd = shufflevector <16 x float> %Add, <16 x float> %Sub, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
132 ret <16 x float> %subadd
135 ; This should not be matched to fmsubadd because the mul is on the wrong side of the fsub.
136 define <2 x double> @mul_subadd_bad_commute(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
137 ; FMA3-LABEL: mul_subadd_bad_commute:
138 ; FMA3: # %bb.0: # %entry
139 ; FMA3-NEXT: vmulpd %xmm1, %xmm0, %xmm0
140 ; FMA3-NEXT: vsubpd %xmm0, %xmm2, %xmm1
141 ; FMA3-NEXT: vaddpd %xmm2, %xmm0, %xmm0
142 ; FMA3-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
145 ; FMA4-LABEL: mul_subadd_bad_commute:
146 ; FMA4: # %bb.0: # %entry
147 ; FMA4-NEXT: vmulpd %xmm1, %xmm0, %xmm0
148 ; FMA4-NEXT: vsubpd %xmm0, %xmm2, %xmm1
149 ; FMA4-NEXT: vaddpd %xmm2, %xmm0, %xmm0
150 ; FMA4-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
153 %AB = fmul <2 x double> %A, %B
154 %Sub = fsub <2 x double> %C, %AB
155 %Add = fadd <2 x double> %AB, %C
156 %subadd = shufflevector <2 x double> %Add, <2 x double> %Sub, <2 x i32> <i32 0, i32 3>
157 ret <2 x double> %subadd
160 attributes #0 = { nounwind "unsafe-fp-math"="true" }