Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGen / X86 / fma4-builtins.c
blob94dcaf61f2681aaeaa540ba54bbf10c185a088ff
1 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s
2 // RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +fma4 -emit-llvm -o - -Wall -Werror | FileCheck %s
5 #include <x86intrin.h>
7 __m128 test_mm_macc_ps(__m128 a, __m128 b, __m128 c) {
8 // CHECK-LABEL: test_mm_macc_ps
9 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
10 return _mm_macc_ps(a, b, c);
13 __m128d test_mm_macc_pd(__m128d a, __m128d b, __m128d c) {
14 // CHECK-LABEL: test_mm_macc_pd
15 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
16 return _mm_macc_pd(a, b, c);
19 __m128 test_mm_macc_ss(__m128 a, __m128 b, __m128 c) {
20 // CHECK-LABEL: test_mm_macc_ss
21 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
22 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
23 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
24 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float %{{.*}})
25 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
26 return _mm_macc_ss(a, b, c);
29 __m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) {
30 // CHECK-LABEL: test_mm_macc_sd
31 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
32 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
33 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
34 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double %{{.*}})
35 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
36 return _mm_macc_sd(a, b, c);
39 __m128 test_mm_msub_ps(__m128 a, __m128 b, __m128 c) {
40 // CHECK-LABEL: test_mm_msub_ps
41 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
42 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
43 return _mm_msub_ps(a, b, c);
46 __m128d test_mm_msub_pd(__m128d a, __m128d b, __m128d c) {
47 // CHECK-LABEL: test_mm_msub_pd
48 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
49 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
50 return _mm_msub_pd(a, b, c);
53 __m128 test_mm_msub_ss(__m128 a, __m128 b, __m128 c) {
54 // CHECK-LABEL: test_mm_msub_ss
55 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
56 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
57 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
58 // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG]], i64 0
59 // CHECK: call float @llvm.fma.f32(float %{{.*}}, float %{{.*}}, float [[C]])
60 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
61 return _mm_msub_ss(a, b, c);
64 __m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) {
65 // CHECK-LABEL: test_mm_msub_sd
66 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
67 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
68 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
69 // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG]], i64 0
70 // CHECK: call double @llvm.fma.f64(double %{{.*}}, double %{{.*}}, double [[C]])
71 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
72 return _mm_msub_sd(a, b, c);
75 __m128 test_mm_nmacc_ps(__m128 a, __m128 b, __m128 c) {
76 // CHECK-LABEL: test_mm_nmacc_ps
77 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
78 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
79 return _mm_nmacc_ps(a, b, c);
82 __m128d test_mm_nmacc_pd(__m128d a, __m128d b, __m128d c) {
83 // CHECK-LABEL: test_mm_nmacc_pd
84 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
85 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
86 return _mm_nmacc_pd(a, b, c);
89 __m128 test_mm_nmacc_ss(__m128 a, __m128 b, __m128 c) {
90 // CHECK-LABEL: test_mm_nmacc_ss
91 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
92 // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0
93 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
94 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
95 // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float %{{.*}})
96 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
97 return _mm_nmacc_ss(a, b, c);
100 __m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) {
101 // CHECK-LABEL: test_mm_nmacc_sd
102 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
103 // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0
104 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
105 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
106 // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double %{{.*}})
107 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
108 return _mm_nmacc_sd(a, b, c);
111 __m128 test_mm_nmsub_ps(__m128 a, __m128 b, __m128 c) {
112 // CHECK-LABEL: test_mm_nmsub_ps
113 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
114 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
115 // CHECK: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
116 return _mm_nmsub_ps(a, b, c);
119 __m128d test_mm_nmsub_pd(__m128d a, __m128d b, __m128d c) {
120 // CHECK-LABEL: test_mm_nmsub_pd
121 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
122 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
123 // CHECK: call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
124 return _mm_nmsub_pd(a, b, c);
127 __m128 test_mm_nmsub_ss(__m128 a, __m128 b, __m128 c) {
128 // CHECK-LABEL: test_mm_nmsub_ss
129 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
130 // CHECK: [[NEG2:%.+]] = fneg <4 x float> %{{.+}}
131 // CHECK: [[A:%.+]] = extractelement <4 x float> [[NEG]], i64 0
132 // CHECK: extractelement <4 x float> %{{.*}}, i64 0
133 // CHECK: [[C:%.+]] = extractelement <4 x float> [[NEG2]], i64 0
134 // CHECK: call float @llvm.fma.f32(float [[A]], float %{{.*}}, float [[C]])
135 // CHECK: insertelement <4 x float> zeroinitializer, float %{{.*}}, i64 0
136 return _mm_nmsub_ss(a, b, c);
139 __m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) {
140 // CHECK-LABEL: test_mm_nmsub_sd
141 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
142 // CHECK: [[NEG2:%.+]] = fneg <2 x double> %{{.+}}
143 // CHECK: [[A:%.+]] = extractelement <2 x double> [[NEG]], i64 0
144 // CHECK: extractelement <2 x double> %{{.*}}, i64 0
145 // CHECK: [[C:%.+]] = extractelement <2 x double> [[NEG2]], i64 0
146 // CHECK: call double @llvm.fma.f64(double [[A]], double %{{.*}}, double [[C]])
147 // CHECK: insertelement <2 x double> zeroinitializer, double %{{.*}}, i64 0
148 return _mm_nmsub_sd(a, b, c);
151 __m128 test_mm_maddsub_ps(__m128 a, __m128 b, __m128 c) {
152 // CHECK-LABEL: test_mm_maddsub_ps
153 // CHECK-NOT: fneg
154 // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}})
155 return _mm_maddsub_ps(a, b, c);
158 __m128d test_mm_maddsub_pd(__m128d a, __m128d b, __m128d c) {
159 // CHECK-LABEL: test_mm_maddsub_pd
160 // CHECK-NOT: fneg
161 // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}})
162 return _mm_maddsub_pd(a, b, c);
165 __m128 test_mm_msubadd_ps(__m128 a, __m128 b, __m128 c) {
166 // CHECK-LABEL: test_mm_msubadd_ps
167 // CHECK: [[NEG:%.+]] = fneg <4 x float> %{{.+}}
168 // CHECK: call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[NEG]])
169 return _mm_msubadd_ps(a, b, c);
172 __m128d test_mm_msubadd_pd(__m128d a, __m128d b, __m128d c) {
173 // CHECK-LABEL: test_mm_msubadd_pd
174 // CHECK: [[NEG:%.+]] = fneg <2 x double> %{{.+}}
175 // CHECK: call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[NEG]])
176 return _mm_msubadd_pd(a, b, c);
179 __m256 test_mm256_macc_ps(__m256 a, __m256 b, __m256 c) {
180 // CHECK-LABEL: test_mm256_macc_ps
181 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
182 return _mm256_macc_ps(a, b, c);
185 __m256d test_mm256_macc_pd(__m256d a, __m256d b, __m256d c) {
186 // CHECK-LABEL: test_mm256_macc_pd
187 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
188 return _mm256_macc_pd(a, b, c);
191 __m256 test_mm256_msub_ps(__m256 a, __m256 b, __m256 c) {
192 // CHECK-LABEL: test_mm256_msub_ps
193 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
194 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
195 return _mm256_msub_ps(a, b, c);
198 __m256d test_mm256_msub_pd(__m256d a, __m256d b, __m256d c) {
199 // CHECK-LABEL: test_mm256_msub_pd
200 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
201 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
202 return _mm256_msub_pd(a, b, c);
205 __m256 test_mm256_nmacc_ps(__m256 a, __m256 b, __m256 c) {
206 // CHECK-LABEL: test_mm256_nmacc_ps
207 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
208 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
209 return _mm256_nmacc_ps(a, b, c);
212 __m256d test_mm256_nmacc_pd(__m256d a, __m256d b, __m256d c) {
213 // CHECK-LABEL: test_mm256_nmacc_pd
214 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
215 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
216 return _mm256_nmacc_pd(a, b, c);
219 __m256 test_mm256_nmsub_ps(__m256 a, __m256 b, __m256 c) {
220 // CHECK-LABEL: test_mm256_nmsub_ps
221 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
222 // CHECK: [[NEG2:%.+]] = fneg <8 x float> %{{.*}}
223 // CHECK: call <8 x float> @llvm.fma.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
224 return _mm256_nmsub_ps(a, b, c);
227 __m256d test_mm256_nmsub_pd(__m256d a, __m256d b, __m256d c) {
228 // CHECK-LABEL: test_mm256_nmsub_pd
229 // CHECK: [[NEG:%.+]] = fneg <4 x double> %{{.+}}
230 // CHECK: [[NEG2:%.+]] = fneg <4 x double> %{{.+}}
231 // CHECK: call <4 x double> @llvm.fma.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
232 return _mm256_nmsub_pd(a, b, c);
235 __m256 test_mm256_maddsub_ps(__m256 a, __m256 b, __m256 c) {
236 // CHECK-LABEL: test_mm256_maddsub_ps
237 // CHECK-NOT: fneg
238 // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}})
239 return _mm256_maddsub_ps(a, b, c);
242 __m256d test_mm256_maddsub_pd(__m256d a, __m256d b, __m256d c) {
243 // CHECK-LABEL: test_mm256_maddsub_pd
244 // CHECK-NOT: fneg
245 // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}})
246 return _mm256_maddsub_pd(a, b, c);
249 __m256 test_mm256_msubadd_ps(__m256 a, __m256 b, __m256 c) {
250 // CHECK-LABEL: test_mm256_msubadd_ps
251 // CHECK: [[NEG:%.+]] = fneg <8 x float> %{{.*}}
252 // CHECK: call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x float> [[NEG]])
253 return _mm256_msubadd_ps(a, b, c);
256 __m256d test_mm256_msubadd_pd(__m256d a, __m256d b, __m256d c) {
257 // CHECK-LABEL: test_mm256_msubadd_pd
258 // CHECK: [[NEG:%.+]] = fneg <4 x double> {{.+}}
259 // CHECK: call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x double> [[NEG]])
260 return _mm256_msubadd_pd(a, b, c);